diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/README.md b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/README.md new file mode 100644 index 000000000..6d64508b4 --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/README.md @@ -0,0 +1,38 @@ +``` +================= +Benchmark results +================= + fail n perf sem% std% peak_memory score weight +bert-fp16 0 1 149.82 0.1% 0.4% 24616 149.818434 0.00 +bert-fp32 0 1 27.22 0.0% 0.1% 31580 27.217099 0.00 +bert-tf32 0 1 116.65 0.1% 0.5% 31582 116.647454 0.00 +bert-tf32-fp16 0 1 150.16 0.1% 0.4% 24616 150.156416 3.00 +bf16 0 1 270.83 0.1% 0.8% 1804 270.832982 0.00 +convnext_large-fp16 0 1 310.32 1.9% 10.0% 27478 310.322641 0.00 +convnext_large-fp32 0 1 42.56 2.2% 11.7% 49598 42.558568 0.00 +convnext_large-tf32 0 1 124.27 4.0% 21.4% 49598 124.274883 0.00 +convnext_large-tf32-fp16 0 1 309.84 1.6% 8.5% 27478 309.838750 3.00 +davit_large 0 1 290.43 0.7% 5.6% 34016 290.434243 1.00 +davit_large-multi 0 1 290.77 0.7% 5.4% 34260 290.773492 5.00 +dlrm 0 1 418230.56 0.1% 0.4% 7120 418230.564140 1.00 +focalnet 0 1 381.31 0.5% 3.5% 25794 381.313064 2.00 +fp16 0 1 252.71 0.1% 0.5% 1804 252.706435 0.00 +fp32 0 1 18.93 0.0% 0.3% 2182 18.931403 0.00 +llama 0 1 453.97 11.5% 71.6% 28442 453.966564 1.00 +reformer 0 1 55.60 0.0% 0.2% 25420 55.599417 1.00 +regnet_y_128gf 0 1 78.11 0.9% 6.5% 31570 78.106848 2.00 +resnet152 0 1 637.53 0.8% 5.9% 35958 637.529996 1.00 +resnet152-multi 0 1 638.26 0.7% 5.6% 35422 638.255791 5.00 +resnet50 0 1 1013.93 1.6% 12.6% 4746 1013.930919 1.00 +rwkv 1 1 NaN NaN NaN 1574 NaN 1.00 +stargan 0 1 38.18 3.0% 22.9% 37442 38.177830 1.00 +super-slomo 0 1 41.83 1.2% 9.6% 33816 41.828308 1.00 +t5 0 1 46.26 0.6% 4.3% 35460 46.264485 2.00 +tf32 0 1 133.34 0.0% 0.4% 2182 133.344138 0.00 +whisper 0 1 214.83 0.1% 0.5% 36740 214.828130 1.00 + +Scores +------ +Failure rate: 3.70% (FAIL) +Score: 245.06 +``` diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/badge.svg b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/badge.svg new file mode 100644 index 000000000..544903e36 --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/badge.svg @@ -0,0 +1 @@ +NVIDIA_A100_80GB_PCIeNVIDIA_A100_80GB_PCIepartialpartial \ No newline at end of file diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bert-fp16.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bert-fp16.D0.data new file mode 100644 index 000000000..53b26543f --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bert-fp16.D0.data @@ -0,0 +1,445 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "fp16", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-fp16", "tag": ["bert-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 73, "power": 105.551, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712077901.108502, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712077901.124953}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.47928237915039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.324040412902832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.062586784362793}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.039785385131836}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.97, "temperature": 72, "power": 312.497}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.756427764892578}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.622476577758789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.533597946166992}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.476072311401367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.44356918334961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.426374435424805}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.416016578674316}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.406279563903809}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.395854949951172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3849458694458}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.377933502197266}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.374731063842773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.374163627624512}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.373811721801758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.372345924377441}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 72, "power": 262.986}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.369985580444336}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.367439270019531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.366805076599121}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.366157531738281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36543083190918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.365005493164062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.364080429077148}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.363504409790039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.362876892089844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.362106323242188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36214828491211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361862182617188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 152.87724257824746, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36141586303711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361193656921387}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 73, "power": 346.639}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361042022705078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360857963562012}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 154.74140036061542, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360292434692383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36062240600586}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360210418701172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359718322753906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359888076782227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.99908125562732, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36003303527832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359695434570312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35957145690918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359235763549805}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359640121459961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.12722221809477, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35922622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358892440795898}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35894775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 73, "power": 311.874}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359354019165039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359068870544434}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.01328752070046, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358457565307617}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35875129699707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358539581298828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358678817749023}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358731269836426}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.35282621139308, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359058380126953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358646392822266}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359306335449219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358448028564453}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358570098876953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.2284956640488, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358207702636719}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.97, "temperature": 74, "power": 272.101}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358234405517578}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358455657958984}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358585357666016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.71168850521985, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358243942260742}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358636856079102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358400344848633}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358379364013672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35833740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.1134768755302, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358665466308594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358572959899902}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358033180236816}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35838508605957}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.6994876814785, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358282089233398}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358419418334961}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 75, "power": 343.577}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35842227935791}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358055114746094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3580322265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.679308993346, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358024597167969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358108520507812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357969284057617}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358566284179688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358064651489258}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.81116457198465, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358379364013672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358316421508789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358358383178711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358470916748047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358149528503418}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.0723919204146, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358348846435547}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35792350769043}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 75, "power": 314.376}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358392715454102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358460426330566}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.99273955456914, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358633995056152}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358476638793945}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357904434204102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358171463012695}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.66613708208294, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358545303344727}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357903480529785}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358219146728516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358108520507812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358248710632324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.6258571309043, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357797622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 76, "power": 261.322}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35807991027832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357889175415039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35851001739502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358098983764648}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.75822825712604, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358307838439941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358318328857422}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358499526977539}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358034133911133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357908248901367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.1353529309938, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358236312866211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358089447021484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35861587524414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358320236206055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358344078063965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.50298880675425, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357931137084961}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 75, "power": 283.842}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357934951782227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358261108398438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358291625976562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358189582824707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.4349718482305, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358278274536133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357937812805176}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358482360839844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357876777648926}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.06292987443084, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358512878417969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358345031738281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358097076416016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358293533325195}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357551574707031}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 76, "power": 330.688}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.64640154660455, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35798454284668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35827922821045}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358026504516602}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358270645141602}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35817813873291}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.42670376474726, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35809326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358072280883789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358430862426758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357963562011719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [144, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358388900756836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.52963472130756, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [145, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35820198059082}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [146, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358226776123047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [147, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35807991027832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [148, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [149, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358162879943848}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 75, "power": 274.318}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.53146036506402, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [150, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358267784118652}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [151, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358076095581055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [152, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358078002929688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [153, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358617782592773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [154, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358043670654297}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.7045908133323, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [155, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357868194580078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [156, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358116149902344}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [157, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35775375366211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [158, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358063697814941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [159, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357721328735352}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.36469133502277, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [160, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358312606811523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [161, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357690811157227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [162, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357881546020508}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [163, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358210563659668}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 77, "power": 247.265}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [164, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.29060277984328, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [165, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [166, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358190536499023}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [167, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357914924621582}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [168, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357951164245605}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [169, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357860565185547}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.399675175413, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [170, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357804298400879}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [171, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357797622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [172, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358224868774414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [173, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35790729522705}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [174, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3578519821167}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.32016976490075, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [175, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357871055603027}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [176, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358051300048828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [177, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358205795288086}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [178, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358312606811523}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 76, "power": 332.917}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [179, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357698440551758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.36037534792553, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 76, "power": 290.907}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712077943.0068586, "return_code": 0}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bert-fp32.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bert-fp32.D0.data new file mode 100644 index 000000000..69c6cb768 --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bert-fp32.D0.data @@ -0,0 +1,228 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "fp32", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-fp32", "tag": ["bert-fp32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.01, "memory": 0.010771942138671876}, "temperature": 74, "power": 106.843, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712077836.003601, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712077836.0194106}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.479292869567871}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29670.375, 81920.0], "load": 1.0, "temperature": 75, "power": 288.505}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.323901176452637}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.044936180114746}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.83090591430664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.708606719970703}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 75, "power": 275.217}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.565232276916504}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.491073608398438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.468774795532227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.4805908203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 76, "power": 311.923}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.254101107707164, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.515676498413086}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.562061309814453}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.278152347016363, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.608406066894531}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 77, "power": 303.085}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.640921592712402}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.326843910432217, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.648000717163086}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.63291072845459}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.341786224266176, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.614981651306152}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 78, "power": 300.483}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.622065544128418}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.297388291665634, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.669747352600098}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.752553939819336}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.274687245700036, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 78, "power": 301.928}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.848938941955566}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.93950080871582}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.246857225515352, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.007094383239746}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.038527488708496}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.26535445752342, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.016993522644043}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 78, "power": 285.573}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.91185188293457}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.215620658582264, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.875959396362305}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.918696403503418}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.226982677684816, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 78, "power": 293.654}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.017876625061035}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.246811866760254}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.19422325556739, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.431685447692871}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.560530662536621}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.212896789111443, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 78, "power": 308.917}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.64004135131836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.689648628234863}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.18328096857228, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.736746788024902}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.803998947143555}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.230739313874587, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 79, "power": 297.953}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.90011978149414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.020580291748047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.202090688085324, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.152454376220703}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.279325485229492}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.19911093178133, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.385612487792969}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 79, "power": 307.212}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.46435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.15907313311105, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.513260841369629}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.550541877746582}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.186907108381124, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 79, "power": 299.515}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.602174758911133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.679553985595703}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.187766579635632, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.78598690032959}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.926142692565918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.20186679611355, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.089705467224121}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 79, "power": 299.9}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.263202667236328}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.193978093426953, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.435498237609863}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.600459098815918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.169159409590325, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 79, "power": 294.643}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.752622604370117}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.883284568786621}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.188029404963412, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.991811752319336}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.081356048583984}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.19309180509355, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 79, "power": 300.951}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.160579681396484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.241079330444336}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.224704317420887, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.328137397766113}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.415624618530273}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.180507871153157, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 79, "power": 301.428}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.503317832946777}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.589831352233887}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.182520064371573, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.672152519226074}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.75645637512207}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.187741851956112, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 79, "power": 288.884}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.840280532836914}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.907486915588379}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.19052326449445, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.961742401123047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.999088287353516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.17834205619532, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 78, "power": 292.988}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.025399208068848}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.040262222290039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.222413323563753, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 78, "power": 299.773}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712077898.4381008, "return_code": 0}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bert-tf32-fp16.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bert-tf32-fp16.D0.data new file mode 100644 index 000000000..d6d1bc587 --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bert-tf32-fp16.D0.data @@ -0,0 +1,445 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 3.0, "name": "bert-tf32-fp16", "tag": ["bert-tf32-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 73, "power": 105.582, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712077990.754599, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712077990.7707229}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.47928237915039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.324040412902832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.062586784362793}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.039785385131836}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 73, "power": 307.221}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.756427764892578}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.622476577758789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.533597946166992}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.476072311401367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.44356918334961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.426374435424805}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.416016578674316}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.406279563903809}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.395854949951172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3849458694458}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.377933502197266}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.374731063842773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.374163627624512}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.373811721801758}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 73, "power": 309.24}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.372345924377441}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.369985580444336}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.367439270019531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.366805076599121}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.366157531738281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36543083190918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.365005493164062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.364080429077148}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.363504409790039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.362876892089844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.362106323242188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36214828491211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361862182617188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 152.95751583105746, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36141586303711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361193656921387}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 75, "power": 250.925}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361042022705078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360857963562012}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 153.1054651219453, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360292434692383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36062240600586}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360210418701172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359718322753906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359888076782227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.57620004980626, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36003303527832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359695434570312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35957145690918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359235763549805}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359640121459961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.71340498561378, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35922622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358892440795898}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 75, "power": 255.027}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35894775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359354019165039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359068870544434}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.53156811074192, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358457565307617}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35875129699707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358539581298828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358678817749023}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358731269836426}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.6244679747852, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359058380126953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358646392822266}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359306335449219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358448028564453}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358570098876953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.9950361955178, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358207702636719}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 75, "power": 257.893}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358234405517578}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358455657958984}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358585357666016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.60892610090096, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358243942260742}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358636856079102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358400344848633}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358379364013672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35833740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.8980747897913, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358665466308594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358572959899902}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358033180236816}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35838508605957}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.78665044826744, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358282089233398}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358419418334961}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 76, "power": 279.627}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35842227935791}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358055114746094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3580322265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.7427044109675, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358024597167969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358108520507812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357969284057617}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358566284179688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358064651489258}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.79841893592229, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358379364013672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358316421508789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358358383178711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358470916748047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358149528503418}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.5930821337377, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358348846435547}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 76, "power": 318.696}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35792350769043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358392715454102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358460426330566}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.58623167486624, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358633995056152}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358476638793945}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357904434204102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358171463012695}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.6818004880261, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358545303344727}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357903480529785}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358219146728516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358108520507812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358248710632324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.66072264942693, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357797622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 75, "power": 249.228}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35807991027832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357889175415039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35851001739502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358098983764648}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.1716942075641, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358307838439941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358318328857422}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358499526977539}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358034133911133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357908248901367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.71687028624214, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358236312866211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358089447021484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35861587524414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358320236206055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358344078063965}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 76, "power": 302.024}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.22178843427483, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357931137084961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357934951782227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358261108398438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358291625976562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358189582824707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.38647792765164, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358278274536133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357937812805176}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358482360839844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357876777648926}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.32605505247176, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358512878417969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358345031738281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358097076416016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358293533325195}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357551574707031}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 76, "power": 300.199}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.50163474123144, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35798454284668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35827922821045}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358026504516602}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358270645141602}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35817813873291}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.25221110778236, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35809326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358072280883789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358430862426758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357963562011719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [144, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358388900756836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.13450818014954, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [145, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35820198059082}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [146, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358226776123047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [147, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35807991027832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [148, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 77, "power": 335.337}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [149, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358162879943848}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.06195396868884, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [150, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358267784118652}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [151, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358076095581055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [152, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358078002929688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [153, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358617782592773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [154, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358043670654297}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.7410177353864, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [155, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357868194580078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [156, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358116149902344}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [157, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35775375366211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [158, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358063697814941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [159, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357721328735352}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.3987521174899, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [160, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358312606811523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [161, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357690811157227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [162, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357881546020508}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [163, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358210563659668}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 77, "power": 280.502}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [164, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.54771507550964, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [165, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [166, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358190536499023}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [167, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357914924621582}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [168, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357951164245605}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [169, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357860565185547}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.4572048403527, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [170, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357804298400879}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [171, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357797622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [172, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358224868774414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [173, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35790729522705}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [174, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3578519821167}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.62833744853714, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [175, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357871055603027}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [176, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358051300048828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [177, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358205795288086}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 76, "power": 280.599}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [178, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358312606811523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [179, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357698440551758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.76659636360603, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 76, "power": 295.533}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712078032.751831, "return_code": 0}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bert-tf32.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bert-tf32.D0.data new file mode 100644 index 000000000..dd763a609 --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bert-tf32.D0.data @@ -0,0 +1,373 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-tf32", "tag": ["bert-tf32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 73, "power": 104.985, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712077945.65208, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712077945.6682127}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.479286193847656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.323932647705078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.044816970825195}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.830974578857422}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.98, "temperature": 73, "power": 256.009}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.708649635314941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.565240859985352}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.491065979003906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.468748092651367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.480533599853516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.515584945678711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.561928749084473}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.608244895935059}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.640739440917969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.647823333740234}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.632768630981445}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.99, "temperature": 74, "power": 320.503}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.614849090576172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.621868133544922}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.669413566589355}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.752106666564941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.848526000976562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.939350128173828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.007367134094238}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.03938102722168}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.01879596710205}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 118.70462377422068, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.915424346923828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.861414909362793}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.006791114807129}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.99, "temperature": 74, "power": 300.431}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.027995109558105}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.58564447113356, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.285082817077637}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.481186866760254}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.615607261657715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.69710636138916}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.39533726862513, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.744179725646973}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.784246444702148}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.841986656188965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.928215026855469}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 115.9827451274002, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.041173934936523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.170112609863281}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.99, "temperature": 75, "power": 322.444}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.299582481384277}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.414661407470703}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.51154639482996, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.506692886352539}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.572625160217285}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.624845504760742}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.687198638916016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.41696221151335, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.77263069152832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.880777359008789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.014769554138184}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.169602394104004}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.64782046459099, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.335892677307129}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.508112907409668}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 1.0, "temperature": 74, "power": 304.768}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.685002326965332}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.864999771118164}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.46253189897377, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.042545318603516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.21193790435791}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.368173599243164}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.505072593688965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 117.37462078541034, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.626041412353516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.737335205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.839637756347656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.941758155822754}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.93688403764692, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.044066429138184}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.99, "temperature": 76, "power": 291.007}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.145570755004883}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.23983097076416}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.333688735961914}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.18791355192967, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.425797462463379}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.513972282409668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.597168922424316}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.675834655761719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.15515117232664, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.757519721984863}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.833535194396973}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.900604248046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.956380844116211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.30099918295276, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.990116119384766}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.99, "temperature": 76, "power": 310.928}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.019303321838379}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.04137897491455}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.049532890319824}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.28659638781434, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.041813850402832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.022936820983887}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.001933097839355}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.978167533874512}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.40177699316514, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.95536994934082}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.924501419067383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.874979019165039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.806092262268066}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.98, "temperature": 76, "power": 251.316}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.4368992015761, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.719812393188477}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.609824180603027}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.481300354003906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.328900337219238}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.25418389498418, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.169635772705078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.010271072387695}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.85876178741455}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.748523712158203}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 115.68765018092856, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.666166305541992}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.625612258911133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.650134086608887}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.99, "temperature": 76, "power": 306.155}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.727069854736328}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.18837317400578, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.84864330291748}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.988987922668457}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.1289644241333}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.244298934936523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.08986301166708, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.312968254089355}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.326717376708984}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.288104057312012}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.236703872680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.1493113189736, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.258355140686035}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.536263465881348}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.127537727355957}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.99, "temperature": 76, "power": 309.656}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.74303913116455}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.12458767141415, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.15192413330078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.16707420349121}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.868474960327148}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.518776893615723}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.23488520032957, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.283734321594238}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.241960525512695}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.301491737365723}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.411982536315918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.40753523254803, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.539347648620605}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.672064781188965}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.98, "temperature": 77, "power": 287.576}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.800228118896484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.920104026794434}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.72236355410804, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.03019905090332}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.13632583618164}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.23299217224121}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.31147003173828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.97151478235925, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.37289810180664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.41785430908203}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.443866729736328}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.45807647705078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 117.68319859829897, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.459491729736328}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.45636749267578}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 1.0, "temperature": 76, "power": 321.38}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.44569206237793}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.440540313720703}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.6127997525185, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.448402404785156}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.4744815826416}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.53107261657715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.61451530456543}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 118.09120963152789, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.71721076965332}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.82810401916504}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.940759658813477}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 17.044357299804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 118.18762217616587, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 17.134565353393555}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.99, "temperature": 77, "power": 270.625}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 17.20937156677246}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 17.272945404052734}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 17.317352294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 117.97527645220266, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.98, "temperature": 77, "power": 282.746}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712077987.9582925, "return_code": 0}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bf16.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bf16.D0.data new file mode 100644 index 000000000..4c2277b92 --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/bf16.D0.data @@ -0,0 +1,110 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "bf16"}, "weight": 0.0, "name": "bf16", "tag": ["bf16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 70, "power": 100.567, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712077272.967851, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "bf16"], "time": 1712077272.9776273}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 187.68587817605857, "units": "Tflops", "t": 1712077274.8174868}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [882.5, 81920.0], "load": 0, "temperature": 67, "power": 63.05}}, "t": 1712077274.3348703}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.80446071776925, "units": "Tflops", "t": 1712077274.8981118}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 0.13, "temperature": 71, "power": 94.774}}, "t": 1712077274.842277}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.7718821433681, "units": "Tflops", "t": 1712077274.978844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.75010311196337, "units": "Tflops", "t": 1712077275.0595205}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.6452897749511, "units": "Tflops", "t": 1712077275.1402483}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.6541554759143, "units": "Tflops", "t": 1712077275.2209487}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.62433677353187, "units": "Tflops", "t": 1712077275.3016598}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.77914259343487, "units": "Tflops", "t": 1712077275.3823252}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 301.556}}, "t": 1712077275.351975}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.6888178399463, "units": "Tflops", "t": 1712077275.4630694}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.621919326757, "units": "Tflops", "t": 1712077275.5437965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.35058367709536, "units": "Tflops", "t": 1712077275.6254904}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 262.6760962961053, "units": "Tflops", "t": 1712077275.7092562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.5021785805088, "units": "Tflops", "t": 1712077275.791219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 273.5420242673793, "units": "Tflops", "t": 1712077275.8716674}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 287.341}}, "t": 1712077275.8590279}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 273.6865042612061, "units": "Tflops", "t": 1712077275.952117}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.74848998872665, "units": "Tflops", "t": 1712077276.0327904}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.6275601025904, "units": "Tflops", "t": 1712077276.113502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.0651740076757, "units": "Tflops", "t": 1712077276.1946776}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.2906771164719, "units": "Tflops", "t": 1712077276.2760918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 273.9043240527408, "units": "Tflops", "t": 1712077276.3564458}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 273.264855547138, "units": "Tflops", "t": 1712077276.4369738}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 295.429}}, "t": 1712077276.3671496}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.39385284716394, "units": "Tflops", "t": 1712077276.518722}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.80972420036323, "units": "Tflops", "t": 1712077276.601202}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.12534009044884, "units": "Tflops", "t": 1712077276.6838827}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.47456611901737, "units": "Tflops", "t": 1712077276.7652342}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 273.4795717504233, "units": "Tflops", "t": 1712077276.8456998}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.7154251668301, "units": "Tflops", "t": 1712077276.9263902}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 301.252}}, "t": 1712077276.8754086}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.7694620792332, "units": "Tflops", "t": 1712077277.0071268}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.49466102342467, "units": "Tflops", "t": 1712077277.0878875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.0237554759334, "units": "Tflops", "t": 1712077277.1690764}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.78266440964177, "units": "Tflops", "t": 1712077277.2503355}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.2638056019428, "units": "Tflops", "t": 1712077277.3311536}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.3498279052019, "units": "Tflops", "t": 1712077277.411946}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 303.218}}, "t": 1712077277.3833127}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.4387101132507, "units": "Tflops", "t": 1712077277.4936788}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.6238462141339, "units": "Tflops", "t": 1712077277.576205}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.5799212360733, "units": "Tflops", "t": 1712077277.658751}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.0880550770809, "units": "Tflops", "t": 1712077277.740217}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.7355856897148, "units": "Tflops", "t": 1712077277.8209062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.7718821433681, "units": "Tflops", "t": 1712077277.9015794}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 302.64}}, "t": 1712077277.8922937}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.8017331322509, "units": "Tflops", "t": 1712077277.9823017}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.7533294156807, "units": "Tflops", "t": 1712077278.0629835}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.15615334478537, "units": "Tflops", "t": 1712077278.1438406}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.5142929693233, "units": "Tflops", "t": 1712077278.2254796}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.9034251685443, "units": "Tflops", "t": 1712077278.3064091}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.34108922901333, "units": "Tflops", "t": 1712077278.3874996}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.6293232163255, "units": "Tflops", "t": 1712077278.4691033}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 302.713}}, "t": 1712077278.3993769}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.44438452341444, "units": "Tflops", "t": 1712077278.5517325}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.09018778827016, "units": "Tflops", "t": 1712077278.6341162}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.70501306669325, "units": "Tflops", "t": 1712077278.7156987}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.85499706698704, "units": "Tflops", "t": 1712077278.7963476}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.8824441817637, "units": "Tflops", "t": 1712077278.8769922}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.792051014604, "units": "Tflops", "t": 1712077278.9576643}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 304.585}}, "t": 1712077278.90737}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.8283624999564, "units": "Tflops", "t": 1712077279.038381}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.8986158538173, "units": "Tflops", "t": 1712077279.119315}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.47126714411195, "units": "Tflops", "t": 1712077279.2003756}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.33266420236396, "units": "Tflops", "t": 1712077279.2817771}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.6088311922987, "units": "Tflops", "t": 1712077279.3633924}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.35923639911266, "units": "Tflops", "t": 1712077279.44508}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 297.085}}, "t": 1712077279.4144542}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.33666960785365, "units": "Tflops", "t": 1712077279.5277438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.522465320497, "units": "Tflops", "t": 1712077279.609996}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.0018745932359, "units": "Tflops", "t": 1712077279.6914887}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 273.81243986506684, "units": "Tflops", "t": 1712077279.7718554}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.2613945448012, "units": "Tflops", "t": 1712077279.8526797}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.80492546352593, "units": "Tflops", "t": 1712077279.933939}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 297.276}}, "t": 1712077279.9263098}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.2822917921485, "units": "Tflops", "t": 1712077280.0148227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.6975784999875, "units": "Tflops", "t": 1712077280.0958183}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.6231660169563, "units": "Tflops", "t": 1712077280.1768546}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.4851570551768, "units": "Tflops", "t": 1712077280.2585075}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.55761475696886, "units": "Tflops", "t": 1712077280.3401432}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.7995301182861, "units": "Tflops", "t": 1712077280.4220085}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.2113683472868, "units": "Tflops", "t": 1712077280.5046582}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 296.878}}, "t": 1712077280.434266}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.69716689706064, "units": "Tflops", "t": 1712077280.5869045}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.3588136951683, "units": "Tflops", "t": 1712077280.6683087}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.8025400064117, "units": "Tflops", "t": 1712077280.7489872}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.6904302574172, "units": "Tflops", "t": 1712077280.8296874}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.76865540073095, "units": "Tflops", "t": 1712077280.9103656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.86306917463287, "units": "Tflops", "t": 1712077280.991007}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 305.73}}, "t": 1712077280.945368}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.8633527435714, "units": "Tflops", "t": 1712077281.0720165}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.5591903571311, "units": "Tflops", "t": 1712077281.153654}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.537921308937, "units": "Tflops", "t": 1712077281.2352865}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.5544636119009, "units": "Tflops", "t": 1712077281.3169227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.46857233499077, "units": "Tflops", "t": 1712077281.3988976}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.2565807968629, "units": "Tflops", "t": 1712077281.4812365}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 297.944}}, "t": 1712077281.452501}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.03823763035194, "units": "Tflops", "t": 1712077281.5627854}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.53870899160927, "units": "Tflops", "t": 1712077281.6444361}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.01610244139914, "units": "Tflops", "t": 1712077281.7259264}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.35161388433784, "units": "Tflops", "t": 1712077281.8061397}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.75601686671956, "units": "Tflops", "t": 1712077281.887109}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.07065641987884, "units": "Tflops", "t": 1712077281.96858}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 75, "power": 312.538}}, "t": 1712077281.9635062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.2316616841135, "units": "Tflops", "t": 1712077282.049456}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "bf16"], "time": 1712077282.91829, "return_code": 0}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/convnext_large-fp16.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/convnext_large-fp16.D0.data new file mode 100644 index 000000000..a7ec90fd2 --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/convnext_large-fp16.D0.data @@ -0,0 +1,296 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-fp16", "tag": ["convnext_large-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.01, "memory": 0.010771942138671876}, "temperature": 73, "power": 105.941, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712077574.944117, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712077574.9599888}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 68, "power": 62.02}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 67, "power": 61.046}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.214508056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [26890.375, 81920.0], "load": 0.1, "temperature": 70, "power": 323.022}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.33624267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.254852294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.179443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.268218994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.201690673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.164154052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13348388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.16265869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.99, "temperature": 72, "power": 274.639}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0748291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077056884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.142242431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1138916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11871337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0850830078125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.9, "temperature": 73, "power": 291.334}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01800537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 334.5882314734153, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.074462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9622802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.078765869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 327.53991730891426, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.048004150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0762939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 345.00244259034025, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.083831787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.153045654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962799072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.9, "temperature": 71, "power": 263.132}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 316.8212055878198, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.094970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.002777099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.088836669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 318.2203070560282, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.033294677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938385009765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.112030029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 316.546684591914, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0, "temperature": 69, "power": 95.548}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 297.4660125327079, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84075927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938690185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93060302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 224.81359756193774, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.798553466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88812255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.874664306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 316.7061906920338, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944305419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.9, "temperature": 74, "power": 277.726}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958221435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.954071044921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 313.2537290564145, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94793701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.856536865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89544677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 316.2556021920928, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9688720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.985076904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.2589462979041, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.89, "temperature": 74, "power": 302.244}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.878204345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.996490478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.023529052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 312.78896120316256, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.968353271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947784423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.955780029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 312.462740954117, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0440673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 1.0, "temperature": 75, "power": 297.189}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 313.1576098282291, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.907257080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.994720458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921539306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.2865708101574, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.981842041015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.964111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.957977294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 317.2197622876041, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04656982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [2, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.98, "temperature": 73, "power": 95.84}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.791717529296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 345.76179508286754, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962738037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77764892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 183.8068383929825, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83221435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86846923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85845947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 304.35881918841017, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921295166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.91, "temperature": 74, "power": 350.705}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.807586669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.933868408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 316.2907130507503, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.926849365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95281982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.5222026874448, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90093994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88641357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.918609619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.99, "temperature": 75, "power": 249.855}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 317.49784052892926, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98846435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.898773193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9808349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.82937612894307, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92547607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.915435791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.835593812176, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.900665283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.936798095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.9, "temperature": 75, "power": 298.34}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.848236083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 316.9509854020993, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9835205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.911590576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99957275390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 313.959630207359, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94586181640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9364013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.957916259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 312.41006871422036, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958465576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.89, "temperature": 76, "power": 272.186}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.973114013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [3, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.763885498046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 344.2054064981413, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.794891357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 142.31367694418424, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.780670166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.772857666015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.878204345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 334.30059699757544, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 1.0, "temperature": 76, "power": 308.271}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712077625.986699, "return_code": 0}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/convnext_large-fp32.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/convnext_large-fp32.D0.data new file mode 100644 index 000000000..b69ebf3ba --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/convnext_large-fp32.D0.data @@ -0,0 +1,171 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "fp32", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-fp32", "tag": ["convnext_large-fp32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 69, "power": 99.212, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712077488.432695, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712077488.4483407}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 65, "power": 59.57}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 64, "power": 59.473}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.214483737945557}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2132.375, 81920.0], "load": 0, "temperature": 64, "power": 88.867}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.33617639541626}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.99, "temperature": 70, "power": 279.627}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.254825592041016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.179396152496338}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 71, "power": 278.931}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.268494129180908}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 64.06826146510974, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.99, "temperature": 72, "power": 306.34}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2018351554870605}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 32.05170496909889, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.164165019989014}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 72, "power": 282.359}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.183949280901075, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.133245468139648}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.99, "temperature": 74, "power": 294.444}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.144510718717946, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.162621021270752}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.01739498552834, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.066978931427002}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 74, "power": 263.852}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.977289260689965, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.074810981750488}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.99, "temperature": 75, "power": 290.746}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.01431417771282, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0772786140441895}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.870716543509964, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043660640716553}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 75, "power": 320.1}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.74912338696986, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.142033576965332}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 76, "power": 298.518}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.77543474972034, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11406946182251}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.78812267537285, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.118796348571777}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 76, "power": 281.462}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.675553964983436, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.084875583648682}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 77, "power": 305.24}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.6483561268729, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.017924785614014}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.663626523918744, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0746049880981445}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 77, "power": 294.346}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.656239792543204, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962323188781738}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 78, "power": 307.075}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.64219096737577, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.078864097595215}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.59848530035381, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.047895908355713}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 77, "power": 274.15}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.56146824537856, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.076254367828369}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 78, "power": 306.121}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.57462530473498, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.083705902099609}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.54465021830659, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.153024196624756}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 78, "power": 322.336}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.525081251915616, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962716102600098}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 78, "power": 303.674}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.56913394948169, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.094931125640869}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.56819683146016, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.002690315246582}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 78, "power": 316.781}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.48043690998095, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.089021682739258}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 79, "power": 311.647}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.444693488809506, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.033406734466553}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.50061485770911, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93844747543335}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 79, "power": 286.178}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.475665475643034, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.112069606781006}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.99, "temperature": 79, "power": 298.131}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.428615750254686, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.875514507293701}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 54.17718731562482, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.840810298919678}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 79, "power": 283.599}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 63.89109500902222, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938738822937012}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 78, "power": 282.671}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 31.493411880743533, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 79, "power": 275.417}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712077572.257265, "return_code": 0}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/convnext_large-tf32-fp16.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/convnext_large-tf32-fp16.D0.data new file mode 100644 index 000000000..3340f63d2 --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/convnext_large-tf32-fp16.D0.data @@ -0,0 +1,295 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 3.0, "name": "convnext_large-tf32-fp16", "tag": ["convnext_large-tf32-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.01, "memory": 0.010771942138671876}, "temperature": 72, "power": 103.999, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712077687.954617, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712077687.97126}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 67, "power": 61.242}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 66, "power": 60.338}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.214508056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2206.375, 81920.0], "load": 0, "temperature": 66, "power": 90.804}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.33624267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.254852294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.179443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.268218994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.201690673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.164154052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13348388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.9, "temperature": 71, "power": 307.676}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.16265869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0748291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077056884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.142242431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1138916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11871337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.9, "temperature": 72, "power": 282.728}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0850830078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01800537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 331.4929569504289, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.074462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9622802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.078765869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 319.82372249235414, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.048004150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0762939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.083831787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 346.9176883316549, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.89, "temperature": 72, "power": 315.776}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.153045654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962799072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 313.33944869938676, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.094970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.002777099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.088836669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 316.31352898474483, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.033294677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938385009765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.112030029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 319.76039580787824, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.95, "temperature": 72, "power": 316.931}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 311.41370066668395, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84075927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938690185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93060302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 225.86195109685386, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.798553466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88812255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.874664306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.99, "temperature": 73, "power": 300.075}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.7426827467468, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944305419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958221435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.954071044921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 310.1130353857362, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94793701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.856536865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89544677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 316.289810350307, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9688720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.9, "temperature": 73, "power": 346.245}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.985076904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 317.47967621401995, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.878204345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.996490478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.023529052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 313.2666860525447, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.968353271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947784423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.955780029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 315.01726300753967, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0440673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.9, "temperature": 74, "power": 278.409}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 316.4962227634663, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.907257080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.994720458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921539306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 316.6834852262511, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.981842041015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.964111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.957977294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 315.4366982044768, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04656982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.9, "temperature": 75, "power": 296.984}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [2, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.791717529296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 346.27779378253, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962738037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77764892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 203.5596768537521, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83221435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86846923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85845947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 321.7080049011913, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 1.0, "temperature": 74, "power": 144.08}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921295166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.807586669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.933868408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 345.79774450935724, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.926849365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 301.8809506292455, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95281982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90093994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88641357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.99, "temperature": 75, "power": 352.88}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 313.2189940989949, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.918609619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98846435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.898773193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 312.8209222407698, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9808349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92547607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 313.41093955709977, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.915435791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.900665283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 1.0, "temperature": 75, "power": 300.282}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.936798095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 315.3416509374621, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.848236083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9835205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.911590576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.2316568080467, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99957275390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94586181640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9364013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 315.61995332478983, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.957916259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.89, "temperature": 75, "power": 291.612}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958465576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.973114013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 313.95434470196216, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [3, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.763885498046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 275.4416541767326, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.794891357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.780670166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.772857666015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 222.08786703109882, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.99, "temperature": 75, "power": 301.761}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.99, "temperature": 75, "power": 301.761}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712077739.0546155, "return_code": 0}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/convnext_large-tf32.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/convnext_large-tf32.D0.data new file mode 100644 index 000000000..1223ea551 --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/convnext_large-tf32.D0.data @@ -0,0 +1,200 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-tf32", "tag": ["convnext_large-tf32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 71, "power": 102.443, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712077628.706463, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712077628.7231634}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 66, "power": 60.753}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 65, "power": 59.75}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2144575119018555}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5832.375, 81920.0], "load": 0.02, "temperature": 65, "power": 91.0}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.336220741271973}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2547926902771}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.179419040679932}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.268465518951416}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 71, "power": 291.293}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2018303871154785}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1641645431518555}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.133294105529785}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 72, "power": 330.126}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.162642955780029}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 88.17739619499449, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.066996097564697}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.074804782867432}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 152.33050351039606, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077259063720703}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 72, "power": 306.133}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.8132574719792, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0435991287231445}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.142058372497559}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.57353773877497, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.114046573638916}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 98.0459629547178, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1187896728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 73, "power": 336.314}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.084855556488037}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.16311466571597, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.017906188964844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.4397259817078, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.074584484100342}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962344169616699}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 74, "power": 304.11}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.89702564161328, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.078824996948242}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.32629468860473, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.047876834869385}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0762128829956055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.83456277495438, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 74, "power": 309.786}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.083765983581543}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.03157142999105, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.153017044067383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962660312652588}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.29697822799912, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.094902038574219}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 75, "power": 292.02}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.17432371590144, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.002725601196289}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.089033126831055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.7771253290328, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.033377170562744}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 96.80165999479628, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.97, "temperature": 75, "power": 287.422}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938430309295654}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.112060070037842}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.32260128648502, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.875522613525391}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 146.5297707551696, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.840785503387451}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.97, "temperature": 75, "power": 264.36}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.8519481533466, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938703536987305}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9308247566223145}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.05294691499935, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.798532009124756}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.068528836604, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.888091087341309}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 76, "power": 281.368}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.874434947967529}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.4556437799436, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944365501403809}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 96.5590262374041, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958274841308594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.953979015350342}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.99, "temperature": 76, "power": 294.352}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.47920470380544, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947924613952637}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 96.66047118859383, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85654878616333}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.895290374755859}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.97, "temperature": 76, "power": 352.312}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.88333581726906, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912010192871094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 96.42080786133188, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.968919277191162}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.984976768493652}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.5718912138979, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.878252029418945}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 76, "power": 309.247}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 96.62590463625313, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.996346950531006}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.023502349853516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.34277192097105, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.968320369720459}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.97, "temperature": 77, "power": 307.577}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 96.53451214884157, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947745323181152}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.955850124359131}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.43709916737896, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.97, "temperature": 77, "power": 286.517}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712077685.256696, "return_code": 0}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/davit_large-multi.0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/davit_large-multi.0.data new file mode 100644 index 000000000..714866f5d --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/davit_large-multi.0.data @@ -0,0 +1,269 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "njobs", "n": 1}, "argv": {"--amp": true, "--model": "davit_large", "--batch-size": 128, "--lr-base": 0.01}, "tags": ["classification", "multigpu", "transformer", "vision"], "weight": 5.0, "name": "davit_large-multi", "tag": ["davit_large-multi", "0"], "job-number": 0, "devices": ["0"]}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.53, "memory": 0.010771942138671876}, "temperature": 73, "power": 106.843, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712078655.279481, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/davit_large-multi.0", "--checkpoint-hist", "1"], "time": 1712078655.2956676}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model davit_large created, param count:196811752\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.005) calculated from base learning rate (0.01) and global batch size (128) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2326.375, 81920.0], "load": 0, "temperature": 68, "power": 93.377}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.224214553833008}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 0/32 ( 0%)] Loss: 7.224 (7.22) Time: 3.226s, 39.67/s (3.226s, 39.67/s) LR: 1.000e-05 Data: 0.693 (0.693)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 7.176412582397461}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 1.0, "temperature": 72, "power": 281.211}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.255987167358398}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.163339138031006}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 0.99, "temperature": 73, "power": 273.253}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.234711647033691}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.243332386016846}, "pipe": "data"} +{"event": "data", "data": {"rate": 294.8439726928596, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.048550128936768}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 1.0, "temperature": 74, "power": 340.826}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.51176852556966, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.24672794342041}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.9046713698683, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.151210784912109}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.5703406410989, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 0.96, "temperature": 75, "power": 299.033}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.270848274230957}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.75355878822086, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.34615238221676, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.312984943389893}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.94670127782297, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 0.96, "temperature": 75, "power": 300.091}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.311421871185303}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 31/32 (100%)] Loss: 7.311 (7.24) Time: 0.417s, 307.07/s (0.509s, 251.35/s) LR: 1.000e-05 Data: 0.001 (0.029)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.718 (0.718) Loss: 7.1176 (7.1176) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.333 (0.179) Loss: 7.0505 (7.2336) Acc@1: 0.0000 ( 0.0969) Acc@5: 0.0000 ( 0.5329)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/davit_large-multi.0/20240402-172421-davit_large-224/checkpoint-0.pth.tar', 0.09689922480620156)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 307.7163250994223, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32722.375, 81920.0], "load": 0.93, "temperature": 75, "power": 237.218}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32722.375, 81920.0], "load": 0.97, "temperature": 76, "power": 306.856}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.413616180419922}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30446.375, 81920.0], "load": 0, "temperature": 70, "power": 343.453}}}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 0/32 ( 0%)] Loss: 7.414 (7.41) Time: 0.950s, 134.69/s (0.950s, 134.69/s) LR: 1.008e-03 Data: 0.535 (0.535)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 274.2118566931733, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.087185859680176}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.28555946930277, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0225982666015625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32552.375, 81920.0], "load": 0.99, "temperature": 75, "power": 252.528}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.6412672521491, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.015655040740967}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.75531914562185, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.004358291625977}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.36553728745525, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32552.375, 81920.0], "load": 0.97, "temperature": 75, "power": 325.319}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06734561920166}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.4019411883425, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 273.8358524441842, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.943554401397705}, "pipe": "data"} +{"event": "data", "data": {"rate": 303.7898200130995, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.032367706298828}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32552.375, 81920.0], "load": 0.96, "temperature": 76, "power": 252.35}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 272.8785524880894, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.978535175323486}, "pipe": "data"} +{"event": "data", "data": {"rate": 304.6717255581839, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.981788635253906}, "pipe": "data"} +{"event": "data", "data": {"rate": 274.5677901320675, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32552.375, 81920.0], "load": 0.96, "temperature": 76, "power": 300.67}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.017374038696289}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.15768337875835, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 31/32 (100%)] Loss: 6.997 (7.05) Time: 0.415s, 308.21/s (0.439s, 291.68/s) LR: 1.008e-03 Data: 0.000 (0.024)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.603 (0.603) Loss: 6.8701 (6.8701) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.7812 ( 0.7812)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.168) Loss: 6.7062 (6.8681) Acc@1: 0.0000 ( 0.2665) Acc@5: 3.1250 ( 1.3081)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/davit_large-multi.0/20240402-172421-davit_large-224/checkpoint-1.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 307.4648709014338, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32796.375, 81920.0], "load": 0.95, "temperature": 76, "power": 301.646}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32796.375, 81920.0], "load": 0.97, "temperature": 76, "power": 297.301}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32796.375, 81920.0], "load": 0.03, "temperature": 71, "power": 99.384}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.885659694671631}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 0/32 ( 0%)] Loss: 6.886 (6.89) Time: 0.930s, 137.70/s (0.930s, 137.70/s) LR: 2.006e-03 Data: 0.516 (0.516)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.915807247161865}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.7073308617536, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33040.375, 81920.0], "load": 0.96, "temperature": 75, "power": 303.977}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 273.830104999405, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9318437576293945}, "pipe": "data"} +{"event": "data", "data": {"rate": 304.7788103103591, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9413652420043945}, "pipe": "data"} +{"event": "data", "data": {"rate": 272.8196715959206, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.995604515075684}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33040.375, 81920.0], "load": 0.96, "temperature": 76, "power": 318.855}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.1788308935454, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912203788757324}, "pipe": "data"} +{"event": "data", "data": {"rate": 274.54100221769824, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.013670921325684}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.43472947405, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33040.375, 81920.0], "load": 0.95, "temperature": 76, "power": 307.78}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 272.5777511504807, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.896271228790283}, "pipe": "data"} +{"event": "data", "data": {"rate": 303.8248546557082, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.970767974853516}, "pipe": "data"} +{"event": "data", "data": {"rate": 273.0200383387749, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33040.375, 81920.0], "load": 0.99, "temperature": 77, "power": 312.726}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.092811584472656}, "pipe": "data"} +{"event": "data", "data": {"rate": 304.67624334344293, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.033717155456543}, "pipe": "data"} +{"event": "data", "data": {"rate": 273.75036308948916, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 31/32 (100%)] Loss: 6.948 (6.96) Time: 0.415s, 308.07/s (0.439s, 291.73/s) LR: 2.006e-03 Data: 0.000 (0.023)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.570 (0.570) Loss: 6.7530 (6.7530) Acc@1: 0.7812 ( 0.7812) Acc@5: 2.3438 ( 2.3438)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.167) Loss: 6.4966 (6.8226) Acc@1: 3.1250 ( 0.2422) Acc@5: 6.2500 ( 1.0417)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 307.36380818917365, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33284.375, 81920.0], "load": 0.94, "temperature": 76, "power": 303.77}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33284.375, 81920.0], "load": 0.94, "temperature": 77, "power": 283.326}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33284.375, 81920.0], "load": 0.03, "temperature": 72, "power": 100.261}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86574649810791}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 0/32 ( 0%)] Loss: 6.866 (6.87) Time: 0.954s, 134.22/s (0.954s, 134.22/s) LR: 3.004e-03 Data: 0.539 (0.539)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 276.79999920074, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.860692024230957}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0.97, "temperature": 76, "power": 314.448}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 273.0181789293082, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.905224323272705}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.36922695818373, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.900017738342285}, "pipe": "data"} +{"event": "data", "data": {"rate": 272.7294380982873, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0.96, "temperature": 76, "power": 284.911}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.911055088043213}, "pipe": "data"} +{"event": "data", "data": {"rate": 304.01918877913823, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 273.1311786486939, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.976555824279785}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.0030525205892, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.897703170776367}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0.96, "temperature": 77, "power": 301.18}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 274.03801670530396, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.003688812255859}, "pipe": "data"} +{"event": "data", "data": {"rate": 303.24990985699, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.037755966186523}, "pipe": "data"} +{"event": "data", "data": {"rate": 272.1906413385024, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0.96, "temperature": 77, "power": 311.303}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.975908279418945}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.08575910635346, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 272.97039532206026, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.015989303588867}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 31/32 (100%)] Loss: 6.998 (6.96) Time: 0.418s, 306.53/s (0.440s, 290.99/s) LR: 3.004e-03 Data: 0.000 (0.024)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.595 (0.595) Loss: 6.8181 (6.8181) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.167) Loss: 6.3763 (6.8167) Acc@1: 0.0000 ( 0.1938) Acc@5: 9.3750 ( 0.9932)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 307.5569831126179, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0, "temperature": 73, "power": 101.877}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33772.375, 81920.0], "load": 0.94, "temperature": 77, "power": 214.339}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33772.375, 81920.0], "load": 0.03, "temperature": 72, "power": 100.928}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8576154708862305}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 0/32 ( 0%)] Loss: 6.858 (6.86) Time: 0.942s, 135.91/s (0.942s, 135.91/s) LR: 4.002e-03 Data: 0.527 (0.527)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33942.375, 81920.0], "load": 0, "temperature": 74, "power": 297.878}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.830388069152832}, "pipe": "data"} +{"event": "data", "data": {"rate": 286.5000055845118, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 263.89076398509974, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.881205081939697}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.99, "temperature": 76, "power": 310.986}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.82670597304406, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.920263290405273}, "pipe": "data"} +{"event": "data", "data": {"rate": 274.0574188767639, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.918469429016113}, "pipe": "data"} +{"event": "data", "data": {"rate": 303.6952405480592, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.96, "temperature": 76, "power": 271.907}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.993577003479004}, "pipe": "data"} +{"event": "data", "data": {"rate": 274.3115977402665, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.038257598876953}, "pipe": "data"} +{"event": "data", "data": {"rate": 304.79523726581255, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 273.60850811477786, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0744194984436035}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.96, "temperature": 76, "power": 311.459}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.12726899709185, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.013601303100586}, "pipe": "data"} +{"event": "data", "data": {"rate": 274.09323426975715, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.049444675445557}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.12720537247503, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.96, "temperature": 77, "power": 290.782}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.013031482696533}, "pipe": "data"} +{"event": "data", "data": {"rate": 273.40695289730274, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 31/32 (100%)] Loss: 7.093 (6.97) Time: 0.417s, 307.27/s (0.439s, 291.56/s) LR: 4.002e-03 Data: 0.000 (0.023)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.589 (0.589) Loss: 6.8217 (6.8217) Acc@1: 0.7812 ( 0.7812) Acc@5: 2.3438 ( 2.3438)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.167) Loss: 6.7427 (6.8290) Acc@1: 3.1250 ( 0.3634) Acc@5: 3.1250 ( 1.1628)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/davit_large-multi.0/20240402-172421-davit_large-224/checkpoint-4.pth.tar', 0.3633720930232558)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 306.9712804876606, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34260.375, 81920.0], "load": 0.97, "temperature": 77, "power": 287.721}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34260.375, 81920.0], "load": 0.93, "temperature": 77, "power": 291.642}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34260.375, 81920.0], "load": 0.03, "temperature": 72, "power": 99.871}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.820207595825195}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 0/32 ( 0%)] Loss: 6.820 (6.82) Time: 0.953s, 134.34/s (0.953s, 134.34/s) LR: 4.997e-03 Data: 0.539 (0.539)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.873222351074219}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.7752899390563, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "stop", "data": null, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/davit_large-multi.0", "--checkpoint-hist", "1"], "time": 1712078776.5063932, "return_code": -15}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/davit_large.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/davit_large.D0.data new file mode 100644 index 000000000..1ee42fced --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/davit_large.D0.data @@ -0,0 +1,255 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "davit_large", "--batch-size": 128, "--lr-base": 0.01}, "tags": ["classification", "transformer", "vision"], "weight": 1.0, "name": "davit_large", "tag": ["davit_large", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.23, "memory": 0.010771942138671876}, "temperature": 70, "power": 101.456, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712078543.67927, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/davit_large.D0", "--checkpoint-hist", "1"], "time": 1712078543.6959698}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model davit_large created, param count:196811752\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.005) calculated from base learning rate (0.01) and global batch size (128) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2250.375, 81920.0], "load": 0, "temperature": 66, "power": 90.804}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2242937088012695}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 0/32 ( 0%)] Loss: 7.224 (7.22) Time: 3.254s, 39.33/s (3.254s, 39.33/s) LR: 1.000e-05 Data: 0.707 (0.707)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 7.176398277282715}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 1.0, "temperature": 70, "power": 294.838}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.255929470062256}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.163320541381836}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 0.99, "temperature": 71, "power": 275.042}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.234607696533203}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.243466377258301}, "pipe": "data"} +{"event": "data", "data": {"rate": 300.5270940005941, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.048627853393555}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 0.98, "temperature": 71, "power": 278.944}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 278.30462812791, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.246768951416016}, "pipe": "data"} +{"event": "data", "data": {"rate": 310.2403670135786, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.151220321655273}, "pipe": "data"} +{"event": "data", "data": {"rate": 278.9808910436719, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 0.96, "temperature": 73, "power": 275.41}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.270956993103027}, "pipe": "data"} +{"event": "data", "data": {"rate": 309.4637261666249, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.2228758698495, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.3129563331604}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.27961934508033, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 0.95, "temperature": 73, "power": 298.615}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.311375617980957}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 31/32 (100%)] Loss: 7.311 (7.24) Time: 0.416s, 307.55/s (0.509s, 251.57/s) LR: 1.000e-05 Data: 0.001 (0.029)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.723 (0.723) Loss: 7.1174 (7.1174) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.339 (0.179) Loss: 7.0508 (7.2335) Acc@1: 0.0000 ( 0.0969) Acc@5: 0.0000 ( 0.5329)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/davit_large.D0/20240402-172229-davit_large-224/checkpoint-0.pth.tar', 0.09689922480620156)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 308.49873079042874, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32722.375, 81920.0], "load": 0.94, "temperature": 74, "power": 306.482}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32722.375, 81920.0], "load": 0.97, "temperature": 74, "power": 296.396}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.413697719573975}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31890.375, 81920.0], "load": 0, "temperature": 69, "power": 313.697}}}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 0/32 ( 0%)] Loss: 7.414 (7.41) Time: 0.953s, 134.28/s (0.953s, 134.28/s) LR: 1.008e-03 Data: 0.539 (0.539)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 7.087247848510742}, "pipe": "data"} +{"event": "data", "data": {"rate": 285.3636305029402, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.9915872373749, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.022705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32552.375, 81920.0], "load": 0.96, "temperature": 74, "power": 337.603}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.24407199088466, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.015710353851318}, "pipe": "data"} +{"event": "data", "data": {"rate": 277.388256161822, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.004858016967773}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.53774873595506, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32552.375, 81920.0], "load": 0.96, "temperature": 74, "power": 281.279}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.067661285400391}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.0976480936771, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.943252086639404}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.91200463468505, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.4420175081399, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.032105445861816}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32552.375, 81920.0], "load": 0.95, "temperature": 75, "power": 295.13}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.00961842346175, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.978611946105957}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.1157522660887, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.981616973876953}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.53203742029007, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32552.375, 81920.0], "load": 0.98, "temperature": 75, "power": 289.09}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01680326461792}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.15100375757925, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 31/32 (100%)] Loss: 6.997 (7.05) Time: 0.415s, 308.57/s (0.438s, 292.34/s) LR: 1.008e-03 Data: 0.000 (0.024)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.568 (0.568) Loss: 6.8692 (6.8692) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.7812 ( 0.7812)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.166) Loss: 6.7062 (6.8682) Acc@1: 0.0000 ( 0.2907) Acc@5: 3.1250 ( 1.2839)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/davit_large.D0/20240402-172229-davit_large-224/checkpoint-1.pth.tar', 0.29069767441860467)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 308.3962937718861, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32796.375, 81920.0], "load": 0.96, "temperature": 75, "power": 325.384}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32796.375, 81920.0], "load": 0.97, "temperature": 75, "power": 309.057}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32796.375, 81920.0], "load": 0.01, "temperature": 70, "power": 96.438}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.885842323303223}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 0/32 ( 0%)] Loss: 6.886 (6.89) Time: 0.929s, 137.86/s (0.929s, 137.86/s) LR: 2.006e-03 Data: 0.515 (0.515)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 224.5434368823345, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.914949417114258}, "pipe": "data"} +{"event": "data", "data": {"rate": 304.4823578176238, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33040.375, 81920.0], "load": 0.99, "temperature": 75, "power": 288.392}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931748867034912}, "pipe": "data"} +{"event": "data", "data": {"rate": 274.70201361061345, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.940550327301025}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.3401215247171, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.03594899808564, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.992883205413818}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33040.375, 81920.0], "load": 0.98, "temperature": 74, "power": 290.365}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 304.6667464560313, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91033935546875}, "pipe": "data"} +{"event": "data", "data": {"rate": 274.31696800164144, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.011209487915039}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.79546016808393, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33040.375, 81920.0], "load": 1.0, "temperature": 75, "power": 261.956}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89418888092041}, "pipe": "data"} +{"event": "data", "data": {"rate": 273.24362302591646, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.969819068908691}, "pipe": "data"} +{"event": "data", "data": {"rate": 303.51021490541086, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 272.88291211638676, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.095127105712891}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33040.375, 81920.0], "load": 0.99, "temperature": 76, "power": 202.157}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.5853286828446, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.027336120605469}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 31/32 (100%)] Loss: 6.948 (6.96) Time: 0.417s, 307.15/s (0.439s, 291.84/s) LR: 2.006e-03 Data: 0.000 (0.023)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 276.3652362514106, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.578 (0.578) Loss: 6.7616 (6.7616) Acc@1: 0.7812 ( 0.7812) Acc@5: 2.3438 ( 2.3438)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.167) Loss: 6.4838 (6.8220) Acc@1: 3.1250 ( 0.2665) Acc@5: 6.2500 ( 1.0174)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 307.1623750663534, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33284.375, 81920.0], "load": 0.96, "temperature": 76, "power": 303.368}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33284.375, 81920.0], "load": 0.96, "temperature": 76, "power": 319.212}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33284.375, 81920.0], "load": 0.03, "temperature": 71, "power": 99.384}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.863471984863281}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 0/32 ( 0%)] Loss: 6.863 (6.86) Time: 0.943s, 135.77/s (0.943s, 135.77/s) LR: 3.004e-03 Data: 0.529 (0.529)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 248.52663798014294, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.857139587402344}, "pipe": "data"} +{"event": "data", "data": {"rate": 272.7562799120878, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0.99, "temperature": 75, "power": 270.234}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.905974388122559}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.0472700194181, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 272.6535054306071, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89678955078125}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.19799903385604, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.914050102233887}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 1.0, "temperature": 76, "power": 337.009}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 273.5052933049932, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9702558517456055}, "pipe": "data"} +{"event": "data", "data": {"rate": 302.962982226593, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.908979415893555}, "pipe": "data"} +{"event": "data", "data": {"rate": 273.30194071942975, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0.96, "temperature": 76, "power": 196.336}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.006004810333252}, "pipe": "data"} +{"event": "data", "data": {"rate": 303.67930274953716, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 272.00698230023255, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.034702301025391}, "pipe": "data"} +{"event": "data", "data": {"rate": 303.4957832212892, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0.99, "temperature": 77, "power": 329.645}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.979143142700195}, "pipe": "data"} +{"event": "data", "data": {"rate": 272.89150292322813, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01246976852417}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 31/32 (100%)] Loss: 6.999 (6.96) Time: 0.418s, 306.55/s (0.440s, 291.14/s) LR: 3.004e-03 Data: 0.000 (0.023)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 306.7599739224944, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.583 (0.583) Loss: 6.8222 (6.8222) Acc@1: 0.0000 ( 0.0000) Acc@5: 1.5625 ( 1.5625)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.167) Loss: 6.3795 (6.8159) Acc@1: 0.0000 ( 0.1696) Acc@5: 6.2500 ( 1.0174)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 306.5910363746384, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33772.375, 81920.0], "load": 0.69, "temperature": 76, "power": 325.042}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33772.375, 81920.0], "load": 0.94, "temperature": 77, "power": 232.118}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33772.375, 81920.0], "load": 0.03, "temperature": 72, "power": 100.469}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.859582901000977}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 0/32 ( 0%)] Loss: 6.860 (6.86) Time: 0.931s, 137.45/s (0.931s, 137.45/s) LR: 4.002e-03 Data: 0.516 (0.516)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 292.9241407052597, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.827597618103027}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.95, "temperature": 76, "power": 309.68}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 303.44004119255453, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.876450061798096}, "pipe": "data"} +{"event": "data", "data": {"rate": 272.88169537397914, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.914443016052246}, "pipe": "data"} +{"event": "data", "data": {"rate": 304.23844681931166, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.96, "temperature": 76, "power": 290.365}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.6354645354039, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.918074607849121}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.6354173339756, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9963908195495605}, "pipe": "data"} +{"event": "data", "data": {"rate": 273.21656186972285, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.96, "temperature": 76, "power": 308.465}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.041259288787842}, "pipe": "data"} +{"event": "data", "data": {"rate": 303.067883017303, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.073038101196289}, "pipe": "data"} +{"event": "data", "data": {"rate": 273.5645263834793, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.011545658111572}, "pipe": "data"} +{"event": "data", "data": {"rate": 303.4469791712168, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.98, "temperature": 77, "power": 313.691}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 272.07464131419607, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.041287422180176}, "pipe": "data"} +{"event": "data", "data": {"rate": 303.56948214952433, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "stop", "data": null, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/davit_large.D0", "--checkpoint-hist", "1"], "time": 1712078652.640405, "return_code": -15}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/dlrm.0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/dlrm.0.data new file mode 100644 index 000000000..852327c08 --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/dlrm.0.data @@ -0,0 +1,282 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/dlrm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "dlrm", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "tags": ["nlp", "rl"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/dlrm", "plan": {"method": "njobs", "n": 1}, "argv": {"--num-batches": 1000, "--data-generation": "random", "--arch-mlp-bot": "512-512-64", "--arch-mlp-top": "1024-1024-1024-1", "--arch-sparse-feature-size": 64, "--arch-embedding-size": "1000000-1000000-1000000-1000000-1000000-1000000-1000000-1000000", "--num-indices-per-lookup": 100, "--arch-interaction-op": "dot", "--numpy-rand-seed": "727", "--print-freq": 999999, "--mini-batch-size": 16384, "--test-mini-batch-size": 16384, "--test-num-workers": 0, "--use-gpu": true}, "weight": 1.0, "name": "dlrm", "tag": ["dlrm", "0"], "job-number": 0, "devices": ["0"]}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 74, "power": 106.635, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712079069.549035, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/dlrm/voirconf-dlrm.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/dlrm/dlrm/dlrm_s_pytorch.py", "--num-batches", "1000", "--data-generation", "random", "--arch-mlp-bot", "512-512-64", "--arch-mlp-top", "1024-1024-1024-1", "--arch-sparse-feature-size", "64", "--arch-embedding-size", "1000000-1000000-1000000-1000000-1000000-1000000-1000000-1000000", "--num-indices-per-lookup", "100", "--arch-interaction-op", "dot", "--numpy-rand-seed", "727", "--print-freq", "999999", "--mini-batch-size", "16384", "--test-mini-batch-size", "16384", "--test-num-workers", "0", "--use-gpu"], "time": 1712079069.565685}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "line", "data": "Unable to import mlperf_logging, No module named 'mlperf_logging'\n", "pipe": "stdout"} +{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:347: UserWarning: torch.distributed.reduce_op is deprecated, please use torch.distributed.ReduceOp instead\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "world size: 1, current rank: 0, local rank: 0\n", "pipe": "stdout"} +{"event": "line", "data": "Using 1 GPU(s)...\n", "pipe": "stdout"} +{"event": "line", "data": "time/loss/accuracy (if enabled):\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 68, "power": 62.248}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 67, "power": 61.265}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3298.375, 81920.0], "load": 0, "temperature": 67, "power": 92.45}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08395528793334961}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5700.375, 81920.0], "load": 0, "temperature": 66, "power": 91.645}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08342313766479492}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5702.375, 81920.0], "load": 0, "temperature": 66, "power": 90.754}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5702.375, 81920.0], "load": 0, "temperature": 65, "power": 90.071}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0849374458193779}, "pipe": "data"} +{"event": "data", "data": {"rate": 420915.74093591835, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6106.375, 81920.0], "load": 0, "temperature": 64, "power": 89.168}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08372959494590759}, "pipe": "data"} +{"event": "data", "data": {"rate": 415392.1765862972, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6308.375, 81920.0], "load": 0, "temperature": 64, "power": 88.375}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08384181559085846}, "pipe": "data"} +{"event": "data", "data": {"rate": 416500.3698779529, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6308.375, 81920.0], "load": 0, "temperature": 63, "power": 88.07}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08388040214776993}, "pipe": "data"} +{"event": "data", "data": {"rate": 419148.13156482705, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6308.375, 81920.0], "load": 0, "temperature": 63, "power": 87.289}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08414746820926666}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6308.375, 81920.0], "load": 0, "temperature": 62, "power": 86.582}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 417720.52885295433, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6308.375, 81920.0], "load": 0, "temperature": 62, "power": 85.997}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08342362195253372}, "pipe": "data"} +{"event": "data", "data": {"rate": 419205.0997282894, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 61, "power": 85.606}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08308812975883484}, "pipe": "data"} +{"event": "data", "data": {"rate": 419477.907718752, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 61, "power": 85.227}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08345107734203339}, "pipe": "data"} +{"event": "data", "data": {"rate": 421598.5036958574, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 61, "power": 84.532}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08459055423736572}, "pipe": "data"} +{"event": "data", "data": {"rate": 419196.89458612987, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0.18, "temperature": 60, "power": 83.543}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 60, "power": 82.847}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08355697244405746}, "pipe": "data"} +{"event": "data", "data": {"rate": 418015.00787158695, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 59, "power": 82.273}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08368614315986633}, "pipe": "data"} +{"event": "data", "data": {"rate": 417431.579972925, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 59, "power": 81.664}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08400876820087433}, "pipe": "data"} +{"event": "data", "data": {"rate": 419955.515063751, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 58, "power": 81.381}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08406656980514526}, "pipe": "data"} +{"event": "data", "data": {"rate": 418416.9339339734, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0.18, "temperature": 58, "power": 80.99}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 58, "power": 80.502}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08438296616077423}, "pipe": "data"} +{"event": "data", "data": {"rate": 419896.53486945486, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 57, "power": 80.272}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08476312458515167}, "pipe": "data"} +{"event": "data", "data": {"rate": 419037.27464949555, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 57, "power": 79.5}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08354128897190094}, "pipe": "data"} +{"event": "data", "data": {"rate": 418406.22724529885, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 56, "power": 79.306}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08408722281455994}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 56, "power": 79.874}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 414229.6689623833, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 56, "power": 78.512}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08347434550523758}, "pipe": "data"} +{"event": "data", "data": {"rate": 416981.4260530626, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 55, "power": 77.545}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08404223620891571}, "pipe": "data"} +{"event": "data", "data": {"rate": 419046.8561344672, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 55, "power": 77.141}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08309965580701828}, "pipe": "data"} +{"event": "data", "data": {"rate": 418063.53958507243, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 55, "power": 77.044}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08461501449346542}, "pipe": "data"} +{"event": "data", "data": {"rate": 417284.8534792512, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 55, "power": 78.114}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 54, "power": 76.458}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08360610902309418}, "pipe": "data"} +{"event": "data", "data": {"rate": 418065.08638540324, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 54, "power": 76.36}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08310449123382568}, "pipe": "data"} +{"event": "data", "data": {"rate": 418999.7996043439, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 54, "power": 76.751}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08348263800144196}, "pipe": "data"} +{"event": "data", "data": {"rate": 419633.91909368907, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 54, "power": 76.054}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08347773551940918}, "pipe": "data"} +{"event": "data", "data": {"rate": 418704.8521539214, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0.18, "temperature": 54, "power": 76.054}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 53, "power": 75.86}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0839482992887497}, "pipe": "data"} +{"event": "data", "data": {"rate": 418186.6259914831, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 53, "power": 75.665}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08302360773086548}, "pipe": "data"} +{"event": "data", "data": {"rate": 420836.14624949946, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 53, "power": 75.469}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08247873932123184}, "pipe": "data"} +{"event": "data", "data": {"rate": 416391.08882406726, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 53, "power": 75.273}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08412615209817886}, "pipe": "data"} +{"event": "data", "data": {"rate": 419035.20622087223, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 53, "power": 75.077}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 52, "power": 74.663}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08320017158985138}, "pipe": "data"} +{"event": "data", "data": {"rate": 418117.43899087526, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 52, "power": 74.467}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08296072483062744}, "pipe": "data"} +{"event": "data", "data": {"rate": 418482.41479882965, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 52, "power": 74.663}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08382801711559296}, "pipe": "data"} +{"event": "data", "data": {"rate": 412173.5083427148, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 52, "power": 74.076}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08423185348510742}, "pipe": "data"} +{"event": "data", "data": {"rate": 417467.37090164266, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 51, "power": 74.076}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 51, "power": 73.686}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08362126350402832}, "pipe": "data"} +{"event": "data", "data": {"rate": 415766.4188528958, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 51, "power": 73.588}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08386819064617157}, "pipe": "data"} +{"event": "data", "data": {"rate": 419603.59087861085, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 51, "power": 73.283}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08274102210998535}, "pipe": "data"} +{"event": "data", "data": {"rate": 418848.9491976157, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 50, "power": 73.185}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08267556130886078}, "pipe": "data"} +{"event": "data", "data": {"rate": 417535.1830858903, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 50, "power": 73.087}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0836000144481659}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 50, "power": 72.891}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 417403.8874303701, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 50, "power": 72.696}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08373723179101944}, "pipe": "data"} +{"event": "data", "data": {"rate": 411420.2259345911, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 49, "power": 72.598}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0839206874370575}, "pipe": "data"} +{"event": "data", "data": {"rate": 419378.19043481385, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 49, "power": 72.403}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08366947621107101}, "pipe": "data"} +{"event": "data", "data": {"rate": 418089.8580852362, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 49, "power": 72.403}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08206789195537567}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0.15, "temperature": 50, "power": 72.794}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 422089.82794944674, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 49, "power": 71.988}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08495134860277176}, "pipe": "data"} +{"event": "data", "data": {"rate": 415339.64079712157, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 48, "power": 71.988}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0824621394276619}, "pipe": "data"} +{"event": "data", "data": {"rate": 417080.48446708685, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 48, "power": 71.792}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08413124084472656}, "pipe": "data"} +{"event": "data", "data": {"rate": 416174.4852171312, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 48, "power": 71.89}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08249908685684204}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 50, "power": 209.78}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 414542.654247997, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 48, "power": 71.207}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08293187618255615}, "pipe": "data"} +{"event": "data", "data": {"rate": 416572.51790449483, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 48, "power": 71.207}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08295343071222305}, "pipe": "data"} +{"event": "data", "data": {"rate": 420874.56676622445, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 47, "power": 71.128}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08349806070327759}, "pipe": "data"} +{"event": "data", "data": {"rate": 419082.21710658615, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 47, "power": 71.097}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08397036790847778}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 47, "power": 70.92}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 421359.3493927722, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 47, "power": 70.803}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08261168003082275}, "pipe": "data"} +{"event": "data", "data": {"rate": 417800.16381520097, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 47, "power": 70.725}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08328827470541}, "pipe": "data"} +{"event": "data", "data": {"rate": 417528.0433637939, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 47, "power": 70.607}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08417266607284546}, "pipe": "data"} +{"event": "data", "data": {"rate": 422818.51170877466, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 47, "power": 70.548}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08396507054567337}, "pipe": "data"} +{"event": "data", "data": {"rate": 420473.54036026297, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0.06, "temperature": 47, "power": 70.51}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 46, "power": 70.451}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08414334058761597}, "pipe": "data"} +{"event": "data", "data": {"rate": 418449.20690926455, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 46, "power": 70.608}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0827147513628006}, "pipe": "data"} +{"event": "data", "data": {"rate": 418946.26167812065, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 46, "power": 70.236}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08464237302541733}, "pipe": "data"} +{"event": "data", "data": {"rate": 419338.5722330398, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 46, "power": 70.236}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08371055126190186}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0.11, "temperature": 46, "power": 70.236}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 419383.3646458749, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 46, "power": 70.282}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08391377329826355}, "pipe": "data"} +{"event": "data", "data": {"rate": 418974.1056920486, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 46, "power": 69.943}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08393114805221558}, "pipe": "data"} +{"event": "data", "data": {"rate": 415050.5790961286, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 46, "power": 69.864}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08339859545230865}, "pipe": "data"} +{"event": "data", "data": {"rate": 417903.3957023033, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 45, "power": 70.158}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 45, "power": 69.253}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/dlrm/voirconf-dlrm.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/dlrm/dlrm/dlrm_s_pytorch.py", "--num-batches", "1000", "--data-generation", "random", "--arch-mlp-bot", "512-512-64", "--arch-mlp-top", "1024-1024-1024-1", "--arch-sparse-feature-size", "64", "--arch-embedding-size", "1000000-1000000-1000000-1000000-1000000-1000000-1000000-1000000", "--num-indices-per-lookup", "100", "--arch-interaction-op", "dot", "--numpy-rand-seed", "727", "--print-freq", "999999", "--mini-batch-size", "16384", "--test-mini-batch-size", "16384", "--test-num-workers", "0", "--use-gpu"], "time": 1712079320.690505, "return_code": 0}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/focalnet.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/focalnet.D0.data new file mode 100644 index 000000000..2383a1ad9 --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/focalnet.D0.data @@ -0,0 +1,261 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "focalnet_base_lrf"}, "tags": ["classification", "convnet", "vision"], "weight": 2.0, "name": "focalnet", "tag": ["focalnet", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.52, "memory": 0.010771942138671876}, "temperature": 72, "power": 104.302, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712078779.133667, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-focalnet.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "focalnet_base_lrf", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/focalnet.D0", "--checkpoint-hist", "1"], "time": 1712078779.1497157}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model focalnet_base_lrf created, param count:88749768\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.9\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.05) calculated from base learning rate (0.1) and global batch size (128) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 7.004467010498047}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4374.375, 81920.0], "load": 0.99, "temperature": 73, "power": 263.955}}}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 0/32 ( 0%)] Loss: 7.004 (7.00) Time: 11.523s, 11.11/s (11.523s, 11.11/s) LR: 1.000e-05 Data: 0.678 (0.678)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [21896.375, 81920.0], "load": 1.0, "temperature": 70, "power": 154.99}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [10120.375, 81920.0], "load": 0.99, "temperature": 71, "power": 228.174}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7748.375, 81920.0], "load": 1.0, "temperature": 71, "power": 193.198}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.006728649139404}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.935497760772705}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23768.375, 81920.0], "load": 0.99, "temperature": 73, "power": 302.517}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.995425701141357}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.060293197631836}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23768.375, 81920.0], "load": 0.96, "temperature": 74, "power": 318.382}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.056240081787109}, "pipe": "data"} +{"event": "data", "data": {"rate": 368.98203453859213, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.029513359069824}, "pipe": "data"} +{"event": "data", "data": {"rate": 395.8628346287947, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.975472450256348}, "pipe": "data"} +{"event": "data", "data": {"rate": 388.85075726776165, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23768.375, 81920.0], "load": 0.97, "temperature": 75, "power": 308.802}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 372.5438146282206, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.069397926330566}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 31/32 (100%)] Loss: 7.005 (7.00) Time: 0.324s, 395.46/s (0.681s, 188.06/s) LR: 1.000e-05 Data: 0.000 (0.028)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 362.3881519639836, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.687 (0.687) Loss: 6.9615 (6.9615) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 1.007 (0.159) Loss: 6.8639 (6.9459) Acc@1: 0.0000 ( 0.1453) Acc@5: 3.1250 ( 0.6541)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/focalnet.D0/20240402-172624-focalnet_base_lrf-224/checkpoint-0.pth.tar', 0.14534883720930233)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24012.375, 81920.0], "load": 0.85, "temperature": 74, "power": 237.08}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 395.5034862581757, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24012.375, 81920.0], "load": 0.94, "temperature": 75, "power": 278.374}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.020679950714111}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 0/32 ( 0%)] Loss: 7.021 (7.02) Time: 0.857s, 149.30/s (0.857s, 149.30/s) LR: 1.001e-02 Data: 0.531 (0.531)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23740.375, 81920.0], "load": 0.62, "temperature": 74, "power": 291.467}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 391.494881363471, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.986495018005371}, "pipe": "data"} +{"event": "data", "data": {"rate": 392.56737588635076, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 375.91260199597957, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.065654754638672}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23840.375, 81920.0], "load": 0.95, "temperature": 75, "power": 282.157}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 365.97239663491354, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077932357788086}, "pipe": "data"} +{"event": "data", "data": {"rate": 380.49319210482963, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.056328773498535}, "pipe": "data"} +{"event": "data", "data": {"rate": 395.28729500287966, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23840.375, 81920.0], "load": 0.99, "temperature": 76, "power": 293.64}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.036379337310791}, "pipe": "data"} +{"event": "data", "data": {"rate": 384.4103988482728, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.007882595062256}, "pipe": "data"} +{"event": "data", "data": {"rate": 368.79133166424015, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 392.34255392418385, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.197965621948242}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23840.375, 81920.0], "load": 0.99, "temperature": 76, "power": 270.557}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 372.53832206644216, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10841703414917}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 31/32 (100%)] Loss: 7.108 (7.05) Time: 0.326s, 393.10/s (0.347s, 368.83/s) LR: 1.001e-02 Data: 0.000 (0.023)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.564 (0.564) Loss: 6.8922 (6.8922) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.031 (0.128) Loss: 6.9393 (6.9700) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 0.9932)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/focalnet.D0/20240402-172624-focalnet_base_lrf-224/checkpoint-1.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 394.5168048994647, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24084.375, 81920.0], "load": 0.97, "temperature": 75, "power": 298.633}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24084.375, 81920.0], "load": 0.03, "temperature": 72, "power": 100.47}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.995872497558594}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 0/32 ( 0%)] Loss: 6.996 (7.00) Time: 0.856s, 149.57/s (0.856s, 149.57/s) LR: 2.001e-02 Data: 0.532 (0.532)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 332.56051552387936, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.097558975219727}, "pipe": "data"} +{"event": "data", "data": {"rate": 354.0308306531893, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24330.375, 81920.0], "load": 0.99, "temperature": 75, "power": 182.324}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 394.1809499198877, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0096845626831055}, "pipe": "data"} +{"event": "data", "data": {"rate": 390.19721597659975, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.010942459106445}, "pipe": "data"} +{"event": "data", "data": {"rate": 370.4053620304631, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24330.375, 81920.0], "load": 0.99, "temperature": 76, "power": 261.623}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.231774806976318}, "pipe": "data"} +{"event": "data", "data": {"rate": 386.3811518975614, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.112374305725098}, "pipe": "data"} +{"event": "data", "data": {"rate": 391.83570272658295, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 371.966992764963, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.235927104949951}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24330.375, 81920.0], "load": 0.99, "temperature": 76, "power": 254.494}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 354.64402026936494, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.224554061889648}, "pipe": "data"} +{"event": "data", "data": {"rate": 395.2725910726848, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 31/32 (100%)] Loss: 7.261 (7.13) Time: 0.323s, 396.27/s (0.347s, 368.57/s) LR: 2.001e-02 Data: 0.000 (0.023)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.554 (0.554) Loss: 6.9301 (6.9301) Acc@1: 0.0000 ( 0.0000) Acc@5: 6.2500 ( 6.2500)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.031 (0.127) Loss: 6.7457 (7.1150) Acc@1: 0.0000 ( 0.1696) Acc@5: 0.0000 ( 0.8479)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 395.8798593328756, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24500.375, 81920.0], "load": 0, "temperature": 72, "power": 100.83}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24574.375, 81920.0], "load": 0.94, "temperature": 77, "power": 300.303}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.089291095733643}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 0/32 ( 0%)] Loss: 7.089 (7.09) Time: 0.846s, 151.34/s (0.846s, 151.34/s) LR: 3.000e-02 Data: 0.523 (0.523)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24818.375, 81920.0], "load": 0.99, "temperature": 76, "power": 292.251}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 344.9315660690889, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.231501579284668}, "pipe": "data"} +{"event": "data", "data": {"rate": 387.90893584782026, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0426130294799805}, "pipe": "data"} +{"event": "data", "data": {"rate": 394.4316636815804, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24818.375, 81920.0], "load": 0.95, "temperature": 76, "power": 284.3}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 393.9493831303574, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.210793495178223}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.230088710784912}, "pipe": "data"} +{"event": "data", "data": {"rate": 382.9996940381012, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.394797325134277}, "pipe": "data"} +{"event": "data", "data": {"rate": 369.8277081344691, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24818.375, 81920.0], "load": 0.99, "temperature": 77, "power": 300.569}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 393.3392875614293, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.26943826675415}, "pipe": "data"} +{"event": "data", "data": {"rate": 388.21333522997116, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.293475151062012}, "pipe": "data"} +{"event": "data", "data": {"rate": 371.74547905583785, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24818.375, 81920.0], "load": 0.97, "temperature": 77, "power": 277.306}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.378293037414551}, "pipe": "data"} +{"event": "data", "data": {"rate": 355.4388159548853, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 31/32 (100%)] Loss: 7.239 (7.23) Time: 0.324s, 395.27/s (0.347s, 368.59/s) LR: 3.000e-02 Data: 0.000 (0.023)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.552 (0.552) Loss: 7.1712 (7.1712) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.7812 ( 0.7812)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.031 (0.127) Loss: 6.2444 (7.1748) Acc@1: 0.0000 ( 0.2180) Acc@5: 25.0000 ( 1.0174)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 394.31335268558576, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25062.375, 81920.0], "load": 0.93, "temperature": 77, "power": 302.752}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25062.375, 81920.0], "load": 0, "temperature": 72, "power": 100.538}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.215520858764648}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 0/32 ( 0%)] Loss: 7.216 (7.22) Time: 0.856s, 149.47/s (0.856s, 149.47/s) LR: 4.000e-02 Data: 0.531 (0.531)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 391.40477399501464, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.295672416687012}, "pipe": "data"} +{"event": "data", "data": {"rate": 392.74346313024085, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25306.375, 81920.0], "load": 0.96, "temperature": 77, "power": 282.309}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.311964988708496}, "pipe": "data"} +{"event": "data", "data": {"rate": 384.63908900090854, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.187459945678711}, "pipe": "data"} +{"event": "data", "data": {"rate": 371.23209789037753, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 357.8944851583026, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.408051490783691}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25306.375, 81920.0], "load": 0.96, "temperature": 77, "power": 326.429}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 394.0250688091481, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.321699142456055}, "pipe": "data"} +{"event": "data", "data": {"rate": 393.6484191984159, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.464118003845215}, "pipe": "data"} +{"event": "data", "data": {"rate": 384.6007968273487, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25306.375, 81920.0], "load": 0.96, "temperature": 77, "power": 304.821}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.423360824584961}, "pipe": "data"} +{"event": "data", "data": {"rate": 370.5561659739286, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 31/32 (100%)] Loss: 7.404 (7.33) Time: 0.323s, 395.67/s (0.348s, 368.04/s) LR: 4.000e-02 Data: 0.000 (0.024)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 384.2803640365545, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.565 (0.565) Loss: 6.9695 (6.9695) Acc@1: 0.0000 ( 0.0000) Acc@5: 4.6875 ( 4.6875)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.032 (0.128) Loss: 6.8203 (7.2576) Acc@1: 0.0000 ( 0.1211) Acc@5: 3.1250 ( 0.7025)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 395.6080105541047, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25550.375, 81920.0], "load": 0.95, "temperature": 77, "power": 336.296}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25550.375, 81920.0], "load": 0.94, "temperature": 77, "power": 305.174}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.211348533630371}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 0/32 ( 0%)] Loss: 7.211 (7.21) Time: 0.862s, 148.43/s (0.862s, 148.43/s) LR: 4.997e-02 Data: 0.539 (0.539)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25794.375, 81920.0], "load": 0.94, "temperature": 77, "power": 297.928}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 370.3252794814747, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.400300025939941}, "pipe": "data"} +{"event": "data", "data": {"rate": 371.0778671168573, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.457738399505615}, "pipe": "data"} +{"event": "data", "data": {"rate": 395.0807654349073, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.70676326751709}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25794.375, 81920.0], "load": 0.96, "temperature": 77, "power": 308.062}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 394.78057517340005, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.551183223724365}, "pipe": "data"} +{"event": "data", "data": {"rate": 380.9927983163714, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 370.98387457628, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.540083885192871}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25794.375, 81920.0], "load": 0.96, "temperature": 77, "power": 289.287}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 393.4815756110184, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.490669250488281}, "pipe": "data"} +{"event": "data", "data": {"rate": 383.8043983102495, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.45649528503418}, "pipe": "data"} +{"event": "data", "data": {"rate": 371.53194665432846, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25794.375, 81920.0], "load": 0.95, "temperature": 78, "power": 307.089}}}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 31/32 (100%)] Loss: 7.408 (7.45) Time: 0.324s, 394.75/s (0.348s, 368.20/s) LR: 4.997e-02 Data: 0.000 (0.024)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 365.4343842059815, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.543 (0.543) Loss: 7.3016 (7.3016) Acc@1: 0.0000 ( 0.0000) Acc@5: 6.2500 ( 6.2500)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.031 (0.127) Loss: 6.8042 (7.2021) Acc@1: 0.0000 ( 0.2422) Acc@5: 0.0000 ( 0.9932)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 394.872320086785, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [26038.375, 81920.0], "load": 0.93, "temperature": 77, "power": 304.187}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [26038.375, 81920.0], "load": 0, "temperature": 73, "power": 101.22}}}, "pipe": "data"} +{"event": "stop", "data": null, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-focalnet.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "focalnet_base_lrf", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/focalnet.D0", "--checkpoint-hist", "1"], "time": 1712078894.5490353, "return_code": -15}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/fp16.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/fp16.D0.data new file mode 100644 index 000000000..9f434d4b2 --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/fp16.D0.data @@ -0,0 +1,141 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 30, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp16"}, "weight": 0.0, "name": "fp16", "tag": ["fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 70, "power": 101.359, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712077244.530782, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "30", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp16"], "time": 1712077244.5408697}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 222.54486758004043, "units": "Tflops", "t": 1712077246.5508718}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [882.5, 81920.0], "load": 0, "temperature": 67, "power": 61.167}}, "t": 1712077245.8868654}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 0.13, "temperature": 69, "power": 303.48}}, "t": 1712077246.4000347}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.5317042014051, "units": "Tflops", "t": 1712077246.8107204}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.163936526009, "units": "Tflops", "t": 1712077247.070349}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 243.726}}, "t": 1712077246.9122415}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.03918272948692, "units": "Tflops", "t": 1712077247.3332746}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.60945854785086, "units": "Tflops", "t": 1712077247.5965824}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 247.759}}, "t": 1712077247.419552}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.00843885729108, "units": "Tflops", "t": 1712077247.857433}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.127405803192, "units": "Tflops", "t": 1712077248.1181223}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 297.768}}, "t": 1712077247.92666}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.47950202942943, "units": "Tflops", "t": 1712077248.3774605}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.96494904260413, "units": "Tflops", "t": 1712077248.6372814}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 276.781}}, "t": 1712077248.4408026}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.09079704420296, "units": "Tflops", "t": 1712077248.8990853}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.89767339288477, "units": "Tflops", "t": 1712077249.1600091}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 251.404}}, "t": 1712077248.9483213}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.94265082627828, "units": "Tflops", "t": 1712077249.423001}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.9265694321592, "units": "Tflops", "t": 1712077249.6838865}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 250.437}}, "t": 1712077249.4564295}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.56846925194102, "units": "Tflops", "t": 1712077249.9431415}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.40439657648997, "units": "Tflops", "t": 1712077250.2025096}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 287.727}}, "t": 1712077249.9643905}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.68210992763017, "units": "Tflops", "t": 1712077250.4616623}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.38350557635073, "units": "Tflops", "t": 1712077250.7231083}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 292.192}}, "t": 1712077250.4718866}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.82913875918806, "units": "Tflops", "t": 1712077250.9862187}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 259.748}}, "t": 1712077250.9802568}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.80981327178375, "units": "Tflops", "t": 1712077251.249353}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 255.00765951103918, "units": "Tflops", "t": 1712077251.5081162}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 235.4}}, "t": 1712077251.488796}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.9757852064729, "units": "Tflops", "t": 1712077251.7700298}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.13458442981218, "units": "Tflops", "t": 1712077252.0307195}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 287.642}}, "t": 1712077251.9969237}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.5819602813517, "units": "Tflops", "t": 1712077252.2930512}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.84712398130065, "units": "Tflops", "t": 1712077252.5550547}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 298.546}}, "t": 1712077252.505392}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.35082863378213, "units": "Tflops", "t": 1712077252.815552}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.3862680424351, "units": "Tflops", "t": 1712077253.0769968}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 266.222}}, "t": 1712077253.0137498}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 257.74672471670175, "units": "Tflops", "t": 1712077253.3330739}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.89107880375622, "units": "Tflops", "t": 1712077253.592973}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 240.606}}, "t": 1712077253.521845}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.19643361174224, "units": "Tflops", "t": 1712077253.855737}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.8511961895104, "units": "Tflops", "t": 1712077254.118787}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 288.795}}, "t": 1712077254.030125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.38151241243773, "units": "Tflops", "t": 1712077254.3813171}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.09776933317048, "units": "Tflops", "t": 1712077254.6420221}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 300.197}}, "t": 1712077254.5415034}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.3529480326505, "units": "Tflops", "t": 1712077254.9014895}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.41161961641615, "units": "Tflops", "t": 1712077255.1618736}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 260.432}}, "t": 1712077255.0513716}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.18867416543696, "units": "Tflops", "t": 1712077255.4235737}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.58721666931706, "units": "Tflops", "t": 1712077255.686902}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 229.698}}, "t": 1712077255.5596206}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.21102731285686, "units": "Tflops", "t": 1712077255.947558}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.4267911604863, "units": "Tflops", "t": 1712077256.2089658}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 295.812}}, "t": 1712077256.0671718}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.88151891581694, "units": "Tflops", "t": 1712077256.4679122}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.43752719153935, "units": "Tflops", "t": 1712077256.7313833}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 280.685}}, "t": 1712077256.576084}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.58721666931706, "units": "Tflops", "t": 1712077256.9947474}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.92148325043715, "units": "Tflops", "t": 1712077257.255644}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 238.083}}, "t": 1712077257.0844693}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.40396113113053, "units": "Tflops", "t": 1712077257.5160992}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 255.48704944225506, "units": "Tflops", "t": 1712077257.774371}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 288.596}}, "t": 1712077257.5939236}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.82512025180884, "units": "Tflops", "t": 1712077258.036483}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 249.4776599018352, "units": "Tflops", "t": 1712077258.3009775}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 287.026}}, "t": 1712077258.1029863}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.67303299353517, "units": "Tflops", "t": 1712077258.5632377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.13442134102067, "units": "Tflops", "t": 1712077258.825992}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 250.509}}, "t": 1712077258.61151}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.54468068017985, "units": "Tflops", "t": 1712077259.0883808}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 255.59206781672756, "units": "Tflops", "t": 1712077259.3465555}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 226.252}}, "t": 1712077259.1199312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.7154338482392, "units": "Tflops", "t": 1712077259.6077058}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 255.2793910091135, "units": "Tflops", "t": 1712077259.8661869}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 241.527}}, "t": 1712077259.6283953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.17650647682925, "units": "Tflops", "t": 1712077260.126893}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.52999965834513, "units": "Tflops", "t": 1712077260.3881924}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 246.911}}, "t": 1712077260.1375098}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.89668767496119, "units": "Tflops", "t": 1712077260.6512637}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.33036598129544, "units": "Tflops", "t": 1712077260.9137995}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 236.858}}, "t": 1712077260.6518612}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.9007725346199, "units": "Tflops", "t": 1712077261.1727145}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 249.935}}, "t": 1712077261.162053}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 255.4427080558217, "units": "Tflops", "t": 1712077261.4310744}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.6406738873113, "units": "Tflops", "t": 1712077261.692263}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 288.801}}, "t": 1712077261.6722648}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.36895220888815, "units": "Tflops", "t": 1712077261.9548306}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.42239885733068, "units": "Tflops", "t": 1712077262.2172825}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 255.828}}, "t": 1712077262.1897674}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.65083080408175, "units": "Tflops", "t": 1712077262.479536}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 255.41677076431165, "units": "Tflops", "t": 1712077262.7378848}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 73, "power": 243.536}}, "t": 1712077262.6980882}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.99964807470843, "units": "Tflops", "t": 1712077262.998758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.71531473646385, "units": "Tflops", "t": 1712077263.2588427}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 262.094}}, "t": 1712077263.206447}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.99432265464102, "units": "Tflops", "t": 1712077263.5186732}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.96489396531246, "units": "Tflops", "t": 1712077263.7774792}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 282.554}}, "t": 1712077263.71479}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.8442622040078, "units": "Tflops", "t": 1712077264.0374675}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.42610031650585, "units": "Tflops", "t": 1712077264.2988775}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 266.512}}, "t": 1712077264.2248616}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.52746451045132, "units": "Tflops", "t": 1712077264.5602367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.48967379569982, "units": "Tflops", "t": 1712077264.8215787}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 266.308}}, "t": 1712077264.7338543}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.528616844093, "units": "Tflops", "t": 1712077265.082918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.48253169992324, "units": "Tflops", "t": 1712077265.3442695}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 266.318}}, "t": 1712077265.2411833}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.467327293404, "units": "Tflops", "t": 1712077265.605692}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.5028070908945, "units": "Tflops", "t": 1712077265.8670201}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 266.512}}, "t": 1712077265.748522}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.4479788787993, "units": "Tflops", "t": 1712077266.1284456}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.43001514906106, "units": "Tflops", "t": 1712077266.3898485}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 266.413}}, "t": 1712077266.2559}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.44176037533407, "units": "Tflops", "t": 1712077266.6512785}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.42610031650585, "units": "Tflops", "t": 1712077266.912686}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 266.319}}, "t": 1712077266.76545}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.43945730364706, "units": "Tflops", "t": 1712077267.1741214}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.44360286294048, "units": "Tflops", "t": 1712077267.4355028}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 265.935}}, "t": 1712077267.2728167}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.40675822019162, "units": "Tflops", "t": 1712077267.6969702}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.356114393945, "units": "Tflops", "t": 1712077267.9584403}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 266.704}}, "t": 1712077267.7816741}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.41850128228725, "units": "Tflops", "t": 1712077268.2198966}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.44452411682946, "units": "Tflops", "t": 1712077268.481275}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 266.702}}, "t": 1712077268.2890494}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.4201131604045, "units": "Tflops", "t": 1712077268.742742}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.35979689603613, "units": "Tflops", "t": 1712077269.0042107}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 266.32}}, "t": 1712077268.7991788}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.3176846923445, "units": "Tflops", "t": 1712077269.265771}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.4079094524799, "units": "Tflops", "t": 1712077269.5271878}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 267.004}}, "t": 1712077269.3065293}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.38557742024466, "units": "Tflops", "t": 1712077269.7886782}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "30", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp16"], "time": 1712077270.259918, "return_code": 0}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/fp32.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/fp32.D0.data new file mode 100644 index 000000000..c95000a56 --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/fp32.D0.data @@ -0,0 +1,301 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp32"}, "weight": 0.0, "name": "fp32", "tag": ["fp32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 71, "power": 99.9, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712077305.368661, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32"], "time": 1712077305.3792894}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 18.68710130815531, "units": "Tflops", "t": 1712077308.2750032}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [882.5, 81920.0], "load": 0, "temperature": 67, "power": 63.246}}, "t": 1712077306.7332208}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 0.24, "temperature": 72, "power": 236.778}}, "t": 1712077307.2411919}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 294.146}}, "t": 1712077307.7487762}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 288.015}}, "t": 1712077308.255338}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.994057850676803, "units": "Tflops", "t": 1712077309.433464}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 282.447}}, "t": 1712077308.7629755}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 283.422}}, "t": 1712077309.2695653}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.994527244925973, "units": "Tflops", "t": 1712077310.591394}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 293.064}}, "t": 1712077309.8030024}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 288.106}}, "t": 1712077310.3131642}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.994984926656688, "units": "Tflops", "t": 1712077311.7492683}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 287.903}}, "t": 1712077310.8223195}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 295.518}}, "t": 1712077311.3314729}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.99354545510755, "units": "Tflops", "t": 1712077312.9072163}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 284.103}}, "t": 1712077311.8417897}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 287.236}}, "t": 1712077312.348321}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 297.354}}, "t": 1712077312.8566823}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.9994964231482, "units": "Tflops", "t": 1712077314.064821}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 288.304}}, "t": 1712077313.3668628}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 304.265}}, "t": 1712077313.877035}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.002005476128602, "units": "Tflops", "t": 1712077315.2222967}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 289.368}}, "t": 1712077314.3891506}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 302.015}}, "t": 1712077314.8957286}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.002326494834936, "units": "Tflops", "t": 1712077316.3796988}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 301.92}}, "t": 1712077315.4022624}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 298.816}}, "t": 1712077315.9100757}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.999977828117547, "units": "Tflops", "t": 1712077317.537271}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 291.218}}, "t": 1712077316.4191806}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 303.763}}, "t": 1712077316.926771}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 295.313}}, "t": 1712077317.4343998}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.99907765980432, "units": "Tflops", "t": 1712077318.6949356}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 298.623}}, "t": 1712077317.9418545}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 296.472}}, "t": 1712077318.4494684}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.000169614128104, "units": "Tflops", "t": 1712077319.852467}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 304.26}}, "t": 1712077318.9560473}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 293.753}}, "t": 1712077319.4626508}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.98960758148448, "units": "Tflops", "t": 1712077321.0106518}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 301.829}}, "t": 1712077319.9703536}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 303.276}}, "t": 1712077320.4795644}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 270.131}}, "t": 1712077320.991699}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.014223576345586, "units": "Tflops", "t": 1712077322.1673636}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 302.707}}, "t": 1712077321.5008812}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 293.288}}, "t": 1712077322.0100975}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.976882256425043, "units": "Tflops", "t": 1712077323.3263178}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 302.902}}, "t": 1712077322.5178225}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 303.479}}, "t": 1712077323.027113}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.991163765371457, "units": "Tflops", "t": 1712077324.4844277}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 297.947}}, "t": 1712077323.5346892}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 300.757}}, "t": 1712077324.0412679}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.976132631407506, "units": "Tflops", "t": 1712077325.6434276}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 304.074}}, "t": 1712077324.548626}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 295.607}}, "t": 1712077325.0560722}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 303.777}}, "t": 1712077325.5645163}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.98503434655105, "units": "Tflops", "t": 1712077326.8019874}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 306.027}}, "t": 1712077326.0718877}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 306.208}}, "t": 1712077326.583812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.98189299001999, "units": "Tflops", "t": 1712077327.9606433}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 290.818}}, "t": 1712077327.0939293}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 303.884}}, "t": 1712077327.6030962}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.984788157935984, "units": "Tflops", "t": 1712077329.11915}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 301.936}}, "t": 1712077328.1097026}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 293.646}}, "t": 1712077328.6189256}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.988825675807085, "units": "Tflops", "t": 1712077330.2773755}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 290.836}}, "t": 1712077329.1254778}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 305.819}}, "t": 1712077329.6320713}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 303.585}}, "t": 1712077330.1386988}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.969415979972116, "units": "Tflops", "t": 1712077331.436827}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 303.58}}, "t": 1712077330.6472082}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 282.043}}, "t": 1712077331.1538332}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.993811427982887, "units": "Tflops", "t": 1712077332.5947883}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 304.167}}, "t": 1712077331.6604373}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 287.905}}, "t": 1712077332.1673846}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.97320497610451, "units": "Tflops", "t": 1712077333.7539961}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 302.21}}, "t": 1712077332.6750336}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 289.371}}, "t": 1712077333.1827157}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 295.312}}, "t": 1712077333.6903963}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.997680610963616, "units": "Tflops", "t": 1712077334.9117212}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 309.114}}, "t": 1712077334.1995592}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 288.107}}, "t": 1712077334.7088869}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.979108054642264, "units": "Tflops", "t": 1712077336.0705419}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 307.374}}, "t": 1712077335.216628}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 282.631}}, "t": 1712077335.724383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.979108054642264, "units": "Tflops", "t": 1712077337.2293873}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 306.107}}, "t": 1712077336.2321973}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 306.688}}, "t": 1712077336.7402503}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.97066060167822, "units": "Tflops", "t": 1712077338.3887339}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 298.218}}, "t": 1712077337.2468696}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 304.546}}, "t": 1712077337.7548234}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 300.747}}, "t": 1712077338.2634094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.957785300498017, "units": "Tflops", "t": 1712077339.5489593}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 303.78}}, "t": 1712077338.7711053}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 294.929}}, "t": 1712077339.2785685}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.97774127457208, "units": "Tflops", "t": 1712077340.7078683}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 284.491}}, "t": 1712077339.785947}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 289.471}}, "t": 1712077340.292524}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.922908796930226, "units": "Tflops", "t": 1712077341.8701537}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 304.36}}, "t": 1712077340.8031745}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 308.882}}, "t": 1712077341.309846}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 281.482}}, "t": 1712077341.81746}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.99115985504207, "units": "Tflops", "t": 1712077343.0283046}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 305.432}}, "t": 1712077342.3276296}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 298.242}}, "t": 1712077342.8380604}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.947768548783912, "units": "Tflops", "t": 1712077344.1890483}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 294.252}}, "t": 1712077343.3446116}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 289.668}}, "t": 1712077343.8523786}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.963986814063126, "units": "Tflops", "t": 1712077345.348785}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 290.917}}, "t": 1712077344.3638182}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 296.182}}, "t": 1712077344.870451}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.880922208446094, "units": "Tflops", "t": 1712077346.5136507}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 306.877}}, "t": 1712077345.3771138}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 288.976}}, "t": 1712077345.8848608}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 295.606}}, "t": 1712077346.391467}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.88866323884558, "units": "Tflops", "t": 1712077347.6780813}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 309.89}}, "t": 1712077346.8993382}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 301.147}}, "t": 1712077347.407996}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.884834463596054, "units": "Tflops", "t": 1712077348.8426907}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 279.33}}, "t": 1712077347.9171476}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 303.093}}, "t": 1712077348.4256394}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.882309868367123, "units": "Tflops", "t": 1712077350.0074513}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 308.062}}, "t": 1712077348.9322577}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 297.549}}, "t": 1712077349.4425132}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 301.928}}, "t": 1712077349.955026}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.89792446104727, "units": "Tflops", "t": 1712077351.1713152}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 300.084}}, "t": 1712077350.4615912}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 307.856}}, "t": 1712077350.970735}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.894076448898378, "units": "Tflops", "t": 1712077352.335372}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 299.89}}, "t": 1712077351.483254}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 294.637}}, "t": 1712077351.9938357}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.881598618861094, "units": "Tflops", "t": 1712077353.5001707}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 289.472}}, "t": 1712077352.500473}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 287.719}}, "t": 1712077353.0082679}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.89564411174342, "units": "Tflops", "t": 1712077354.6641371}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 295.016}}, "t": 1712077353.5165305}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 287.129}}, "t": 1712077354.0231462}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 299.001}}, "t": 1712077354.5310075}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.919702582815287, "units": "Tflops", "t": 1712077355.826679}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 286.554}}, "t": 1712077355.0385358}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 288.775}}, "t": 1712077355.5454686}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.860224315467267, "units": "Tflops", "t": 1712077356.9928074}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 293.265}}, "t": 1712077356.053057}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 287.804}}, "t": 1712077356.5596998}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.895106045052607, "units": "Tflops", "t": 1712077358.1567872}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 284.117}}, "t": 1712077357.067402}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 299.488}}, "t": 1712077357.5739653}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 288.88}}, "t": 1712077358.0806093}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.930971829754103, "units": "Tflops", "t": 1712077359.3186383}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 298.815}}, "t": 1712077358.5872436}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 292.674}}, "t": 1712077359.0961425}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.93936452063713, "units": "Tflops", "t": 1712077360.4799504}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 293.362}}, "t": 1712077359.6057162}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 285.866}}, "t": 1712077360.1133754}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.92450066274071, "units": "Tflops", "t": 1712077361.6421306}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 297.451}}, "t": 1712077360.6219945}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 300.556}}, "t": 1712077361.1305969}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 297.168}}, "t": 1712077361.637259}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.86843472129579, "units": "Tflops", "t": 1712077362.807823}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 297.642}}, "t": 1712077362.143834}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 294.826}}, "t": 1712077362.6511762}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.894273843978016, "units": "Tflops", "t": 1712077363.9718525}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 284.985}}, "t": 1712077363.1587524}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 292.881}}, "t": 1712077363.666399}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.90244419844568, "units": "Tflops", "t": 1712077365.1353765}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 298.23}}, "t": 1712077364.176579}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 314.376}}, "t": 1712077364.6857278}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.923064089122487, "units": "Tflops", "t": 1712077366.2976499}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 291.037}}, "t": 1712077365.1948285}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 284.288}}, "t": 1712077365.7039764}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 298.421}}, "t": 1712077366.213254}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.928065860475584, "units": "Tflops", "t": 1712077367.459626}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 287.531}}, "t": 1712077366.7251267}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 298.519}}, "t": 1712077367.236897}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.928364963178655, "units": "Tflops", "t": 1712077368.6215801}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 293.843}}, "t": 1712077367.7461922}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 299.013}}, "t": 1712077368.252814}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.866335140501327, "units": "Tflops", "t": 1712077369.787351}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 299.29}}, "t": 1712077368.760692}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 292.891}}, "t": 1712077369.2688613}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 287.116}}, "t": 1712077369.775489}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.915946561796286, "units": "Tflops", "t": 1712077370.9500737}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 284.206}}, "t": 1712077370.2832644}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 298.041}}, "t": 1712077370.7909024}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.893449456591526, "units": "Tflops", "t": 1712077372.114177}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 287.23}}, "t": 1712077371.2983148}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 288.283}}, "t": 1712077371.8061888}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.881931044026054, "units": "Tflops", "t": 1712077373.2789934}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 302.416}}, "t": 1712077372.312764}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 295.898}}, "t": 1712077372.8229253}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.872032883553587, "units": "Tflops", "t": 1712077374.4444017}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 315.529}}, "t": 1712077373.3295374}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 286.341}}, "t": 1712077373.8387468}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 285.381}}, "t": 1712077374.3480346}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.852583641765843, "units": "Tflops", "t": 1712077375.6110356}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 291.027}}, "t": 1712077374.8574302}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 283.505}}, "t": 1712077375.365246}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.822328582714757, "units": "Tflops", "t": 1712077376.7795346}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 294.131}}, "t": 1712077375.8745782}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 291.706}}, "t": 1712077376.3811822}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.886659706251443, "units": "Tflops", "t": 1712077377.9440253}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 286.632}}, "t": 1712077376.8878233}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 293.558}}, "t": 1712077377.3952458}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 280.486}}, "t": 1712077377.9026494}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.943188207051758, "units": "Tflops", "t": 1712077379.1051114}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 297.052}}, "t": 1712077378.4092185}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 290.042}}, "t": 1712077378.9157865}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.856491870762856, "units": "Tflops", "t": 1712077380.271498}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 292.284}}, "t": 1712077379.4224355}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 287.812}}, "t": 1712077379.9319508}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.83273991092385, "units": "Tflops", "t": 1712077381.4393294}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 294.142}}, "t": 1712077380.4425433}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 292.492}}, "t": 1712077380.9491093}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.84636234078353, "units": "Tflops", "t": 1712077382.6063101}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 284.79}}, "t": 1712077381.4568737}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 284.986}}, "t": 1712077381.9647448}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 293.956}}, "t": 1712077382.471332}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.89598864535747, "units": "Tflops", "t": 1712077383.7703235}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 303.579}}, "t": 1712077382.9798634}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 283.312}}, "t": 1712077383.4868112}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.853631843987383, "units": "Tflops", "t": 1712077384.9368927}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 292.892}}, "t": 1712077383.994443}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 292.797}}, "t": 1712077384.502745}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.858080292669314, "units": "Tflops", "t": 1712077386.1031506}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 283.119}}, "t": 1712077385.010339}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 284.305}}, "t": 1712077385.5190766}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 292.3}}, "t": 1712077386.0293517}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.83426664360845, "units": "Tflops", "t": 1712077387.2709444}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 286.747}}, "t": 1712077386.5359921}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 294.33}}, "t": 1712077387.046058}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.91422425761594, "units": "Tflops", "t": 1712077388.433765}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 285.867}}, "t": 1712077387.5526981}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 305.742}}, "t": 1712077388.0605266}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.862314821391486, "units": "Tflops", "t": 1712077389.5997648}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 297.547}}, "t": 1712077388.5683527}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 282.626}}, "t": 1712077389.0761986}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 298.406}}, "t": 1712077389.5844991}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.85469943443248, "units": "Tflops", "t": 1712077390.7663093}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 288.006}}, "t": 1712077390.0922668}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 309.122}}, "t": 1712077390.599741}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.898547878122248, "units": "Tflops", "t": 1712077391.930084}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 288.389}}, "t": 1712077391.106807}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 281.945}}, "t": 1712077391.613375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.93784403273525, "units": "Tflops", "t": 1712077393.0914242}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 298.522}}, "t": 1712077392.1207025}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 294.242}}, "t": 1712077392.6343484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.884846063600598, "units": "Tflops", "t": 1712077394.2560513}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 300.459}}, "t": 1712077393.1409867}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 307.47}}, "t": 1712077393.6502435}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 291.611}}, "t": 1712077394.1608934}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.86032844409441, "units": "Tflops", "t": 1712077395.4221985}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 298.614}}, "t": 1712077394.67172}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 293.063}}, "t": 1712077395.1783574}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.924380292795476, "units": "Tflops", "t": 1712077396.584362}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 292.186}}, "t": 1712077395.6873908}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 281.274}}, "t": 1712077396.1950583}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.883357512776975, "units": "Tflops", "t": 1712077397.7490659}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 303.173}}, "t": 1712077396.7038152}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 290.832}}, "t": 1712077397.2129517}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 300.962}}, "t": 1712077397.719542}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.923238795885464, "units": "Tflops", "t": 1712077398.9113703}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 299.208}}, "t": 1712077398.226309}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 306.607}}, "t": 1712077398.7329216}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.96719635480691, "units": "Tflops", "t": 1712077400.0709205}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 289.368}}, "t": 1712077399.2398987}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 304.161}}, "t": 1712077399.7465804}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.921418121557824, "units": "Tflops", "t": 1712077401.233293}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 291.611}}, "t": 1712077400.2571023}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 294.824}}, "t": 1712077400.7637312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.893348832071833, "units": "Tflops", "t": 1712077402.3973691}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 279.705}}, "t": 1712077401.2721446}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 291.709}}, "t": 1712077401.780039}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 298.042}}, "t": 1712077402.2889624}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.972166851357798, "units": "Tflops", "t": 1712077403.5566478}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 280.987}}, "t": 1712077402.7965493}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 299.221}}, "t": 1712077403.3041596}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.987332414894, "units": "Tflops", "t": 1712077404.71496}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 308.445}}, "t": 1712077403.812741}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 290.45}}, "t": 1712077404.3193288}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.950127685495993, "units": "Tflops", "t": 1712077405.875577}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 309.301}}, "t": 1712077404.826258}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 290.437}}, "t": 1712077405.3343413}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 279.138}}, "t": 1712077405.8409684}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.96712224623274, "units": "Tflops", "t": 1712077407.0351598}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 304.159}}, "t": 1712077406.3485608}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 294.442}}, "t": 1712077406.8576987}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.993216910663154, "units": "Tflops", "t": 1712077408.1931806}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 295.401}}, "t": 1712077407.3710613}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 305.238}}, "t": 1712077407.8802004}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.973099597359415, "units": "Tflops", "t": 1712077409.352403}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 294.723}}, "t": 1712077408.3894868}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 285.177}}, "t": 1712077408.8994203}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.99138274638699, "units": "Tflops", "t": 1712077410.5104814}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 299.889}}, "t": 1712077409.4103992}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 293.65}}, "t": 1712077409.9209597}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 302.511}}, "t": 1712077410.4275205}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.991930221020205, "units": "Tflops", "t": 1712077411.6685834}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 294.722}}, "t": 1712077410.9340382}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 303.093}}, "t": 1712077411.4418168}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32"], "time": 1712077412.4395995, "return_code": 0}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/llama.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/llama.D0.data new file mode 100644 index 000000000..2e7a034ae --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/llama.D0.data @@ -0,0 +1,631 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/llm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "llm", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 800, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama", "plan": {"method": "per_gpu"}, "tags": ["llm", "nlp"], "weight": 1.0, "name": "llama", "tag": ["llama", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 36, "power": 45.357, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712076924.998052, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["python", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1712076925.0079544}, "pipe": null} +{"event": "line", "data": "Dataset\n", "pipe": "stderr"} +{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/datasets/table.py:1421: FutureWarning: promote has been superseded by mode='default'.\n", "pipe": "stderr"} +{"event": "line", "data": " table = cls._concat_blocks(blocks, axis=0)\n", "pipe": "stderr"} +{"event": "line", "data": "Tokenizer\n", "pipe": "stderr"} +{"event": "line", "data": "Model\n", "pipe": "stderr"} +{"event": "line", "data": "Pipeline\n", "pipe": "stderr"} +{"event": "line", "data": "Starting\n", "pipe": "stderr"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =10.143764019012451, total / elapsed =200.41870021715306 in_token_count =9 out_token_count =2024\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 200.41870021715306, "units": "Tok/s", "t": 1712076998.716492}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27230.375, 81920.0], "load": 0, "temperature": 35, "power": 62.838}}, "t": 1712076988.6630177}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27252.375, 81920.0], "load": 0.18, "temperature": 36, "power": 63.025}}, "t": 1712076989.1705136}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27278.375, 81920.0], "load": 0.53, "temperature": 37, "power": 217.038}}, "t": 1712076989.6808262}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27328.375, 81920.0], "load": 0.96, "temperature": 38, "power": 219.719}}, "t": 1712076990.1933587}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27374.375, 81920.0], "load": 0.97, "temperature": 39, "power": 222.577}}, "t": 1712076990.7016249}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27554.375, 81920.0], "load": 0.96, "temperature": 39, "power": 224.342}}, "t": 1712076991.2115796}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27614.375, 81920.0], "load": 0.97, "temperature": 40, "power": 223.806}}, "t": 1712076991.7192144}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27654.375, 81920.0], "load": 0.96, "temperature": 40, "power": 224.837}}, "t": 1712076992.2300007}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27714.375, 81920.0], "load": 0.96, "temperature": 40, "power": 221.056}}, "t": 1712076992.7383416}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27754.375, 81920.0], "load": 0.96, "temperature": 40, "power": 223.825}}, "t": 1712076993.2484715}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27814.375, 81920.0], "load": 0.96, "temperature": 40, "power": 227.214}}, "t": 1712076993.7567909}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27834.375, 81920.0], "load": 0.96, "temperature": 41, "power": 218.945}}, "t": 1712076994.2655935}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27914.375, 81920.0], "load": 0.96, "temperature": 41, "power": 216.4}}, "t": 1712076994.7731984}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27914.375, 81920.0], "load": 0.96, "temperature": 41, "power": 222.327}}, "t": 1712076995.2866879}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28034.375, 81920.0], "load": 0.96, "temperature": 41, "power": 222.858}}, "t": 1712076995.7954605}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28034.375, 81920.0], "load": 0.97, "temperature": 42, "power": 224.106}}, "t": 1712076996.3050852}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28034.375, 81920.0], "load": 0.97, "temperature": 42, "power": 222.23}}, "t": 1712076996.8133888}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28254.375, 81920.0], "load": 0.97, "temperature": 42, "power": 224.239}}, "t": 1712076997.3242602}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28254.375, 81920.0], "load": 0.96, "temperature": 42, "power": 231.487}}, "t": 1712076997.8326375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28254.375, 81920.0], "load": 0.96, "temperature": 43, "power": 227.303}}, "t": 1712076998.343296}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =5.301281929016113, total / elapsed =380.66264481325726 in_token_count =185 out_token_count =1833\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 380.66264481325726, "units": "Tok/s", "t": 1712077004.0177922}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 44, "power": 296.968}}, "t": 1712076998.8758256}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 43, "power": 225.913}}, "t": 1712076999.3872433}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 43, "power": 227.986}}, "t": 1712076999.8955493}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 43, "power": 231.035}}, "t": 1712077000.4068785}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 44, "power": 230.428}}, "t": 1712077000.914623}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 44, "power": 232.889}}, "t": 1712077001.4255269}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 44, "power": 230.945}}, "t": 1712077001.9338295}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 44, "power": 232.59}}, "t": 1712077002.4444304}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 44, "power": 232.228}}, "t": 1712077002.9519699}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 45, "power": 229.549}}, "t": 1712077003.464068}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 45, "power": 229.698}}, "t": 1712077003.9715762}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =6.773216962814331, total / elapsed =308.2730128775349 in_token_count =121 out_token_count =1967\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 308.2730128775349, "units": "Tok/s", "t": 1712077010.7910252}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.98, "temperature": 45, "power": 229.169}}, "t": 1712077004.4843094}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 45, "power": 233.386}}, "t": 1712077004.9926116}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 45, "power": 228.655}}, "t": 1712077005.5009358}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.95, "temperature": 45, "power": 224.538}}, "t": 1712077006.0085108}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 46, "power": 226.02}}, "t": 1712077006.519377}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 46, "power": 226.361}}, "t": 1712077007.0276918}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 46, "power": 226.227}}, "t": 1712077007.5377593}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 46, "power": 229.571}}, "t": 1712077008.0461383}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 46, "power": 226.718}}, "t": 1712077008.5587854}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 47, "power": 229.627}}, "t": 1712077009.0674236}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 47, "power": 223.125}}, "t": 1712077009.5787327}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 47, "power": 232.784}}, "t": 1712077010.08634}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 47, "power": 229.86}}, "t": 1712077010.596971}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =6.612996339797974, total / elapsed =311.20537412287024 in_token_count =127 out_token_count =1931\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 311.20537412287024, "units": "Tok/s", "t": 1712077017.4040391}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.95, "temperature": 48, "power": 230.971}}, "t": 1712077011.1051986}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 48, "power": 235.344}}, "t": 1712077011.61562}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 48, "power": 226.455}}, "t": 1712077012.1239812}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 48, "power": 231.862}}, "t": 1712077012.6323142}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 48, "power": 233.267}}, "t": 1712077013.1406302}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 48, "power": 235.787}}, "t": 1712077013.6531157}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 48, "power": 233.89}}, "t": 1712077014.161289}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 48, "power": 230.169}}, "t": 1712077014.6751626}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 49, "power": 236.538}}, "t": 1712077015.187888}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 49, "power": 236.161}}, "t": 1712077015.696339}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 49, "power": 233.52}}, "t": 1712077016.2080505}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 49, "power": 237.084}}, "t": 1712077016.7164338}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 49, "power": 232.244}}, "t": 1712077017.227582}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.362201690673828, total / elapsed =212.6636517544092 in_token_count =6 out_token_count =1985\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 212.6636517544092, "units": "Tok/s", "t": 1712077026.7662604}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 49, "power": 236.25}}, "t": 1712077017.7361279}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 49, "power": 232.235}}, "t": 1712077018.2455752}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.94, "temperature": 50, "power": 233.598}}, "t": 1712077018.7532315}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 50, "power": 232.253}}, "t": 1712077019.2651622}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 50, "power": 236.93}}, "t": 1712077019.7735038}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 50, "power": 236.155}}, "t": 1712077020.2863476}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 50, "power": 235.889}}, "t": 1712077020.7939498}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 50, "power": 228.61}}, "t": 1712077021.303867}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 50, "power": 237.99}}, "t": 1712077021.8151917}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 51, "power": 238.11}}, "t": 1712077022.325006}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 51, "power": 236.946}}, "t": 1712077022.8332937}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 51, "power": 230.374}}, "t": 1712077023.3424761}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 51, "power": 231.61}}, "t": 1712077023.8506193}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 51, "power": 231.967}}, "t": 1712077024.3609152}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 51, "power": 228.327}}, "t": 1712077024.8693233}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 51, "power": 241.374}}, "t": 1712077025.3818033}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 52, "power": 238.242}}, "t": 1712077025.890193}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 52, "power": 236.543}}, "t": 1712077026.4031544}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.6670026779174805, total / elapsed =597.2179985545356 in_token_count =256 out_token_count =1934\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 597.2179985545356, "units": "Tok/s", "t": 1712077030.4332798}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.97, "temperature": 54, "power": 303.674}}, "t": 1712077026.913884}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.97, "temperature": 52, "power": 232.031}}, "t": 1712077027.4261255}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.96, "temperature": 52, "power": 228.938}}, "t": 1712077027.9345155}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.97, "temperature": 52, "power": 234.831}}, "t": 1712077028.4460652}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.96, "temperature": 52, "power": 238.455}}, "t": 1712077028.9543102}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.96, "temperature": 53, "power": 241.228}}, "t": 1712077029.4647264}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.97, "temperature": 53, "power": 239.598}}, "t": 1712077029.9724667}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =1.7476789951324463, total / elapsed =1291.9992780647224 in_token_count =340 out_token_count =1918\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 1291.9992780647224, "units": "Tok/s", "t": 1712077032.180981}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.97, "temperature": 53, "power": 238.227}}, "t": 1712077030.4800365}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.99, "temperature": 53, "power": 237.169}}, "t": 1712077030.9893632}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 53, "power": 244.685}}, "t": 1712077031.4977155}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 53, "power": 242.639}}, "t": 1712077032.007775}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =7.349722146987915, total / elapsed =289.39869527879955 in_token_count =95 out_token_count =2032\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 289.39869527879955, "units": "Tok/s", "t": 1712077039.5307202}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 53, "power": 236.158}}, "t": 1712077032.5173497}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 54, "power": 234.351}}, "t": 1712077033.0257845}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 54, "power": 242.061}}, "t": 1712077033.5366666}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.95, "temperature": 54, "power": 244.591}}, "t": 1712077034.04434}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 54, "power": 247.438}}, "t": 1712077034.5543199}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 54, "power": 238.485}}, "t": 1712077035.0626655}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 54, "power": 238.937}}, "t": 1712077035.5749645}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 54, "power": 241.002}}, "t": 1712077036.0832703}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 54, "power": 242.234}}, "t": 1712077036.594024}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 55, "power": 242.633}}, "t": 1712077037.1023319}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 55, "power": 240.705}}, "t": 1712077037.6136827}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 55, "power": 239.681}}, "t": 1712077038.122026}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 55, "power": 242.024}}, "t": 1712077038.632818}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 55, "power": 236.571}}, "t": 1712077039.1409974}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.412617206573486, total / elapsed =220.02382063871423 in_token_count =5 out_token_count =2066\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 220.02382063871423, "units": "Tok/s", "t": 1712077048.943358}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.94, "temperature": 55, "power": 229.427}}, "t": 1712077039.6493304}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 55, "power": 238.342}}, "t": 1712077040.156812}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 55, "power": 236.089}}, "t": 1712077040.6686745}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 55, "power": 240.344}}, "t": 1712077041.1770523}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.95, "temperature": 55, "power": 236.799}}, "t": 1712077041.688002}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.95, "temperature": 55, "power": 237.261}}, "t": 1712077042.1962936}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 56, "power": 240.961}}, "t": 1712077042.704597}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.95, "temperature": 56, "power": 239.859}}, "t": 1712077043.2144172}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 56, "power": 246.777}}, "t": 1712077043.7232552}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 56, "power": 244.985}}, "t": 1712077044.2340555}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 56, "power": 235.963}}, "t": 1712077044.7415862}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 56, "power": 243.199}}, "t": 1712077045.2512512}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 56, "power": 242.245}}, "t": 1712077045.76066}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 56, "power": 244.207}}, "t": 1712077046.2732058}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 56, "power": 244.137}}, "t": 1712077046.781587}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 57, "power": 244.131}}, "t": 1712077047.29467}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 57, "power": 238.817}}, "t": 1712077047.8031044}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 57, "power": 236.317}}, "t": 1712077048.312418}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 57, "power": 247.031}}, "t": 1712077048.8201714}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.7340760231018066, total / elapsed =563.1915330564397 in_token_count =253 out_token_count =1850\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 563.1915330564397, "units": "Tok/s", "t": 1712077052.6774495}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.98, "temperature": 57, "power": 236.028}}, "t": 1712077049.3311498}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 57, "power": 241.04}}, "t": 1712077049.8398747}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 57, "power": 239.926}}, "t": 1712077050.3485436}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 57, "power": 246.374}}, "t": 1712077050.8572392}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 57, "power": 243.698}}, "t": 1712077051.3686786}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 57, "power": 239.484}}, "t": 1712077051.877047}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 58, "power": 238.033}}, "t": 1712077052.3909776}, "pipe": "data"} +{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.099916696548462, total / elapsed =681.6312200752608 in_token_count =282 out_token_count =1831\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 681.6312200752608, "units": "Tok/s", "t": 1712077055.777384}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.94, "temperature": 60, "power": 305.004}}, "t": 1712077052.9005234}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 58, "power": 248.398}}, "t": 1712077053.4102662}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 58, "power": 241.597}}, "t": 1712077053.919587}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 58, "power": 248.901}}, "t": 1712077054.4286094}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 58, "power": 248.828}}, "t": 1712077054.9384027}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 58, "power": 246.998}}, "t": 1712077055.4479783}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.6785972118377686, total / elapsed =569.7824141373912 in_token_count =256 out_token_count =1840\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 569.7824141373912, "units": "Tok/s", "t": 1712077059.4559987}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 61, "power": 299.658}}, "t": 1712077055.955819}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 58, "power": 250.26}}, "t": 1712077056.4646406}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 58, "power": 250.794}}, "t": 1712077056.9743443}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 59, "power": 239.45}}, "t": 1712077057.48686}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 59, "power": 249.554}}, "t": 1712077057.9951653}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 59, "power": 247.034}}, "t": 1712077058.5079925}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 59, "power": 241.22}}, "t": 1712077059.0161667}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.39958119392395, total / elapsed =212.6690496904466 in_token_count =5 out_token_count =1994\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 212.6690496904466, "units": "Tok/s", "t": 1712077068.8556008}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 59, "power": 250.64}}, "t": 1712077059.526569}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.93, "temperature": 59, "power": 238.784}}, "t": 1712077060.034873}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 59, "power": 247.337}}, "t": 1712077060.5458949}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.95, "temperature": 59, "power": 243.86}}, "t": 1712077061.0536392}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 59, "power": 250.232}}, "t": 1712077061.564295}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 59, "power": 244.394}}, "t": 1712077062.0726852}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 59, "power": 251.124}}, "t": 1712077062.585282}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 59, "power": 239.521}}, "t": 1712077063.0929062}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 59, "power": 248.057}}, "t": 1712077063.6058283}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 60, "power": 248.686}}, "t": 1712077064.114153}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 60, "power": 250.861}}, "t": 1712077064.6254284}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 60, "power": 251.245}}, "t": 1712077065.1330726}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 60, "power": 249.063}}, "t": 1712077065.6438518}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 60, "power": 242.914}}, "t": 1712077066.152168}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 60, "power": 241.384}}, "t": 1712077066.660989}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 60, "power": 241.173}}, "t": 1712077067.1686792}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 60, "power": 252.494}}, "t": 1712077067.6824338}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 60, "power": 251.748}}, "t": 1712077068.1907816}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 60, "power": 250.953}}, "t": 1712077068.7032185}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =1.5333330631256104, total / elapsed =1351.9567599842333 in_token_count =349 out_token_count =1724\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 1351.9567599842333, "units": "Tok/s", "t": 1712077070.3889477}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 1.0, "temperature": 63, "power": 298.881}}, "t": 1712077069.2150197}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 61, "power": 256.021}}, "t": 1712077069.7245393}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 61, "power": 255.156}}, "t": 1712077070.2358518}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =2.981513738632202, total / elapsed =772.4264255969933 in_token_count =287 out_token_count =2016\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 772.4264255969933, "units": "Tok/s", "t": 1712077073.370476}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 1.0, "temperature": 63, "power": 297.8}}, "t": 1712077070.7478192}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 61, "power": 256.517}}, "t": 1712077071.2593775}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 61, "power": 255.708}}, "t": 1712077071.7707248}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 61, "power": 256.301}}, "t": 1712077072.2863202}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 61, "power": 251.569}}, "t": 1712077072.7944968}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 61, "power": 248.039}}, "t": 1712077073.3056884}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.337865114212036, total / elapsed =219.10789832314356 in_token_count =7 out_token_count =2039\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 219.10789832314356, "units": "Tok/s", "t": 1712077082.7083616}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 61, "power": 256.259}}, "t": 1712077073.8139553}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 61, "power": 254.062}}, "t": 1712077074.324509}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 61, "power": 245.754}}, "t": 1712077074.8325636}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.95, "temperature": 61, "power": 253.299}}, "t": 1712077075.3419423}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 62, "power": 247.681}}, "t": 1712077075.8507304}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 62, "power": 253.52}}, "t": 1712077076.362146}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 62, "power": 257.923}}, "t": 1712077076.8705842}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 62, "power": 246.558}}, "t": 1712077077.3817143}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 62, "power": 255.142}}, "t": 1712077077.8927941}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 62, "power": 256.727}}, "t": 1712077078.4028542}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 62, "power": 257.302}}, "t": 1712077078.9126284}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 62, "power": 256.593}}, "t": 1712077079.4223073}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 62, "power": 255.153}}, "t": 1712077079.9321141}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 62, "power": 253.693}}, "t": 1712077080.4409366}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 62, "power": 246.981}}, "t": 1712077080.948501}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 62, "power": 258.648}}, "t": 1712077081.4613638}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 63, "power": 257.42}}, "t": 1712077081.9697726}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 63, "power": 256.17}}, "t": 1712077082.48259}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =1.1987826824188232, total / elapsed =1825.1848580137982 in_token_count =363 out_token_count =1825\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 1825.1848580137982, "units": "Tok/s", "t": 1712077083.9071574}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 66, "power": 302.668}}, "t": 1712077082.9919574}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 63, "power": 257.015}}, "t": 1712077083.5002096}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.360902070999146, total / elapsed =216.43213278309116 in_token_count =7 out_token_count =2019\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 216.43213278309116, "units": "Tok/s", "t": 1712077093.2680798}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 63, "power": 228.772}}, "t": 1712077084.0085957}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.94, "temperature": 63, "power": 249.24}}, "t": 1712077084.519348}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.94, "temperature": 63, "power": 247.319}}, "t": 1712077085.027666}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 63, "power": 253.18}}, "t": 1712077085.5382407}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 63, "power": 252.613}}, "t": 1712077086.0458567}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 63, "power": 257.612}}, "t": 1712077086.557521}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 63, "power": 250.177}}, "t": 1712077087.0658932}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 63, "power": 251.754}}, "t": 1712077087.5790062}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 63, "power": 253.479}}, "t": 1712077088.0873787}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 64, "power": 258.49}}, "t": 1712077088.5971737}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 64, "power": 245.067}}, "t": 1712077089.1055946}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 64, "power": 247.452}}, "t": 1712077089.61578}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 64, "power": 255.314}}, "t": 1712077090.1241825}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 64, "power": 254.541}}, "t": 1712077090.634713}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 64, "power": 251.056}}, "t": 1712077091.1422677}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 64, "power": 254.212}}, "t": 1712077091.65464}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 64, "power": 261.404}}, "t": 1712077092.1630156}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 64, "power": 260.322}}, "t": 1712077092.675888}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 64, "power": 259.375}}, "t": 1712077093.187532}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =1.6552915573120117, total / elapsed =1223.3494401967162 in_token_count =344 out_token_count =1681\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 1223.3494401967162, "units": "Tok/s", "t": 1712077094.9233859}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 1.0, "temperature": 65, "power": 305.602}}, "t": 1712077093.6974325}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 64, "power": 258.964}}, "t": 1712077094.2057726}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 64, "power": 259.262}}, "t": 1712077094.7170568}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =6.735776901245117, total / elapsed =302.8603871400348 in_token_count =122 out_token_count =1918\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 302.8603871400348, "units": "Tok/s", "t": 1712077101.6591804}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 65, "power": 242.883}}, "t": 1712077095.2282736}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 65, "power": 260.724}}, "t": 1712077095.7365975}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 256.398}}, "t": 1712077096.2479167}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 65, "power": 253.015}}, "t": 1712077096.756201}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 65, "power": 259.385}}, "t": 1712077097.2644558}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 260.834}}, "t": 1712077097.7742095}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 261.829}}, "t": 1712077098.2867599}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 65, "power": 261.797}}, "t": 1712077098.7943554}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 65, "power": 257.263}}, "t": 1712077099.3072329}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 262.32}}, "t": 1712077099.8155463}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 261.33}}, "t": 1712077100.327143}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 65, "power": 259.641}}, "t": 1712077100.8346636}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 65, "power": 257.334}}, "t": 1712077101.3453913}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.376733779907227, total / elapsed =208.60142197823524 in_token_count =6 out_token_count =1950\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 208.60142197823524, "units": "Tok/s", "t": 1712077111.0359323}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.94, "temperature": 65, "power": 259.524}}, "t": 1712077101.8537173}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 65, "power": 255.343}}, "t": 1712077102.3643446}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 65, "power": 261.329}}, "t": 1712077102.8727148}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 65, "power": 261.487}}, "t": 1712077103.3838775}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 66, "power": 250.746}}, "t": 1712077103.8915277}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 66, "power": 258.779}}, "t": 1712077104.4042647}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 66, "power": 263.045}}, "t": 1712077104.9125872}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 66, "power": 256.57}}, "t": 1712077105.4216917}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 66, "power": 258.342}}, "t": 1712077105.9300387}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 66, "power": 258.881}}, "t": 1712077106.4407473}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 66, "power": 261.719}}, "t": 1712077106.9490478}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 66, "power": 261.701}}, "t": 1712077107.4596784}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 66, "power": 264.014}}, "t": 1712077107.9678414}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 66, "power": 262.896}}, "t": 1712077108.4795127}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 66, "power": 262.673}}, "t": 1712077108.9872966}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 66, "power": 260.643}}, "t": 1712077109.5003147}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 66, "power": 259.18}}, "t": 1712077110.0116837}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 66, "power": 264.342}}, "t": 1712077110.52004}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 264.338}}, "t": 1712077111.0284092}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =7.454042434692383, total / elapsed =262.1396399496938 in_token_count =91 out_token_count =1863\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 262.1396399496938, "units": "Tok/s", "t": 1712077118.4899912}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 67, "power": 259.961}}, "t": 1712077111.5362024}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 67, "power": 253.052}}, "t": 1712077112.0461195}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 67, "power": 255.804}}, "t": 1712077112.5547445}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 67, "power": 260.209}}, "t": 1712077113.063104}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 67, "power": 260.451}}, "t": 1712077113.575485}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 67, "power": 263.274}}, "t": 1712077114.0838072}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 256.366}}, "t": 1712077114.596054}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 259.88}}, "t": 1712077115.1043634}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 253.745}}, "t": 1712077115.6155698}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 255.264}}, "t": 1712077116.12388}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 258.281}}, "t": 1712077116.6325793}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 264.545}}, "t": 1712077117.1409705}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 264.976}}, "t": 1712077117.6516128}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 264.431}}, "t": 1712077118.1599905}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =5.846051216125488, total / elapsed =348.78243871276953 in_token_count =162 out_token_count =1877\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 348.78243871276953, "units": "Tok/s", "t": 1712077124.3360605}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 232.784}}, "t": 1712077118.7225287}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 68, "power": 265.613}}, "t": 1712077119.2333412}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 68, "power": 269.511}}, "t": 1712077119.7446232}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 268.822}}, "t": 1712077120.252583}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 270.094}}, "t": 1712077120.7638667}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 262.892}}, "t": 1712077121.274015}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 265.972}}, "t": 1712077121.783775}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 68, "power": 264.437}}, "t": 1712077122.2931328}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 263.491}}, "t": 1712077122.8026392}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 260.802}}, "t": 1712077123.3119967}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 269.439}}, "t": 1712077123.82158}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 68, "power": 268.243}}, "t": 1712077124.333879}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =5.276447296142578, total / elapsed =394.20463870085723 in_token_count =186 out_token_count =1894\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 394.20463870085723, "units": "Tok/s", "t": 1712077129.6125243}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 267.349}}, "t": 1712077124.8477187}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 68, "power": 267.877}}, "t": 1712077125.3589501}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 68, "power": 269.314}}, "t": 1712077125.8702157}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 269.52}}, "t": 1712077126.3799691}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 269.618}}, "t": 1712077126.8899188}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 267.782}}, "t": 1712077127.3988347}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 270.565}}, "t": 1712077127.9089699}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 270.95}}, "t": 1712077128.4172628}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 270.946}}, "t": 1712077128.927256}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 267.246}}, "t": 1712077129.4367208}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =6.864799976348877, total / elapsed =298.91621126175625 in_token_count =117 out_token_count =1935\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 298.91621126175625, "units": "Tok/s", "t": 1712077136.4773426}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 256.768}}, "t": 1712077129.9444873}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 269.566}}, "t": 1712077130.456895}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 259.187}}, "t": 1712077130.9652357}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 265.622}}, "t": 1712077131.478324}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 263.837}}, "t": 1712077131.9860687}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 265.017}}, "t": 1712077132.4980223}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 267.676}}, "t": 1712077133.006613}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 265.808}}, "t": 1712077133.5158017}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 268.92}}, "t": 1712077134.024262}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 269.13}}, "t": 1712077134.5348558}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 265.43}}, "t": 1712077135.043271}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 263.005}}, "t": 1712077135.5555665}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 275.159}}, "t": 1712077136.06371}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.365670204162598, total / elapsed =215.57453508267352 in_token_count =6 out_token_count =2013\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 215.57453508267352, "units": "Tok/s", "t": 1712077145.8430338}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 251.528}}, "t": 1712077136.5744483}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 266.814}}, "t": 1712077137.0818756}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 266.688}}, "t": 1712077137.5924528}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 270.304}}, "t": 1712077138.10077}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 265.715}}, "t": 1712077138.6116982}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 269.025}}, "t": 1712077139.1199605}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 70, "power": 272.951}}, "t": 1712077139.629561}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 262.893}}, "t": 1712077140.1379366}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 271.083}}, "t": 1712077140.646874}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 268.944}}, "t": 1712077141.1551952}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 260.215}}, "t": 1712077141.6674006}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 261.865}}, "t": 1712077142.175712}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 263.322}}, "t": 1712077142.6850648}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 268.791}}, "t": 1712077143.1927297}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 266.425}}, "t": 1712077143.7035}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 263.986}}, "t": 1712077144.2118268}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 265.806}}, "t": 1712077144.7222881}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 277.388}}, "t": 1712077145.2306936}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 271.722}}, "t": 1712077145.743145}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =7.434460878372192, total / elapsed =266.865349412452 in_token_count =91 out_token_count =1893\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 266.865349412452, "units": "Tok/s", "t": 1712077153.2775137}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 70, "power": 270.909}}, "t": 1712077146.250626}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 270.014}}, "t": 1712077146.762114}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 267.199}}, "t": 1712077147.2704577}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 271.914}}, "t": 1712077147.7796578}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 262.221}}, "t": 1712077148.288019}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 266.527}}, "t": 1712077148.7983856}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 270.045}}, "t": 1712077149.3082378}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 273.251}}, "t": 1712077149.8170698}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 274.223}}, "t": 1712077150.3264296}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 273.059}}, "t": 1712077150.8358314}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 273.754}}, "t": 1712077151.3464525}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 274.234}}, "t": 1712077151.8572943}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 271.586}}, "t": 1712077152.3686512}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 272.48}}, "t": 1712077152.8774369}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.292436361312866, total / elapsed =224.16080336822526 in_token_count =9 out_token_count =2074\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 224.16080336822526, "units": "Tok/s", "t": 1712077162.56997}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 219.784}}, "t": 1712077153.3882813}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 263.883}}, "t": 1712077153.8972597}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 272.381}}, "t": 1712077154.407827}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 271.207}}, "t": 1712077154.9160748}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 272.188}}, "t": 1712077155.4248896}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 268.489}}, "t": 1712077155.932514}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 273.768}}, "t": 1712077156.4451005}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 264.402}}, "t": 1712077156.9534082}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 71, "power": 262.537}}, "t": 1712077157.466297}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 266.913}}, "t": 1712077157.9776962}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 266.81}}, "t": 1712077158.4865434}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 271.504}}, "t": 1712077158.9940698}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 273.545}}, "t": 1712077159.503849}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 272.069}}, "t": 1712077160.0137076}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 271.299}}, "t": 1712077160.5224233}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 273.062}}, "t": 1712077161.030317}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 271.824}}, "t": 1712077161.5394044}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 270.817}}, "t": 1712077162.0477192}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 271.586}}, "t": 1712077162.560269}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.320425033569336, total / elapsed =578.5403918410394 in_token_count =273 out_token_count =1648\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 578.5403918410394, "units": "Tok/s", "t": 1712077165.8904104}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 1.0, "temperature": 71, "power": 267.79}}, "t": 1712077163.0762973}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 272.752}}, "t": 1712077163.5893717}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 261.754}}, "t": 1712077164.0977495}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 275.42}}, "t": 1712077164.608556}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 271.25}}, "t": 1712077165.116231}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 275.488}}, "t": 1712077165.627081}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.4148550033569336, total / elapsed =605.5894021758103 in_token_count =269 out_token_count =1799\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 605.5894021758103, "units": "Tok/s", "t": 1712077169.3052857}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 74, "power": 298.053}}, "t": 1712077166.1348414}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 275.506}}, "t": 1712077166.6453552}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 275.233}}, "t": 1712077167.1536598}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 275.216}}, "t": 1712077167.6672573}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 272.779}}, "t": 1712077168.17551}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 273.452}}, "t": 1712077168.6873498}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 269.087}}, "t": 1712077169.1958363}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =4.696740627288818, total / elapsed =392.39978237075167 in_token_count =213 out_token_count =1630\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 392.39978237075167, "units": "Tok/s", "t": 1712077174.0020456}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 247.569}}, "t": 1712077169.705012}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 273.458}}, "t": 1712077170.2134132}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 273.072}}, "t": 1712077170.7226486}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 271.216}}, "t": 1712077171.2305584}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 274.143}}, "t": 1712077171.74097}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 273.068}}, "t": 1712077172.2492068}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 273.826}}, "t": 1712077172.7587242}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 274.026}}, "t": 1712077173.2671452}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 272.955}}, "t": 1712077173.7810025}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.253382205963135, total / elapsed =221.75675383619566 in_token_count =11 out_token_count =2041\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 221.75675383619566, "units": "Tok/s", "t": 1712077183.2554452}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 71, "power": 271.295}}, "t": 1712077174.2938197}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 273.149}}, "t": 1712077174.8066704}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 274.239}}, "t": 1712077175.315022}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 270.742}}, "t": 1712077175.8268209}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 276.385}}, "t": 1712077176.3351636}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 268.205}}, "t": 1712077176.8443575}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 274.333}}, "t": 1712077177.3539078}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 72, "power": 265.142}}, "t": 1712077177.8624861}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 72, "power": 265.128}}, "t": 1712077178.371919}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 272.842}}, "t": 1712077178.881152}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 272.589}}, "t": 1712077179.3923628}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 274.207}}, "t": 1712077179.9036546}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 275.129}}, "t": 1712077180.4170952}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 274.267}}, "t": 1712077180.9254632}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 273.646}}, "t": 1712077181.4355805}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 269.257}}, "t": 1712077181.9442914}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 267.051}}, "t": 1712077182.4546003}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 275.895}}, "t": 1712077182.962811}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =6.158296823501587, total / elapsed =298.45914425328385 in_token_count =148 out_token_count =1690\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 298.45914425328385, "units": "Tok/s", "t": 1712077189.4137588}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 297.072}}, "t": 1712077183.4728684}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 272.207}}, "t": 1712077183.9815722}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 274.638}}, "t": 1712077184.4926016}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 276.804}}, "t": 1712077185.0004954}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 265.911}}, "t": 1712077185.5132625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 276.294}}, "t": 1712077186.0216515}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 268.428}}, "t": 1712077186.5326214}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 268.105}}, "t": 1712077187.0409584}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 266.831}}, "t": 1712077187.5517237}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 269.23}}, "t": 1712077188.0597699}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 276.589}}, "t": 1712077188.5687995}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 276.014}}, "t": 1712077189.076331}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =7.021287679672241, total / elapsed =291.54196400845314 in_token_count =110 out_token_count =1937\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 291.54196400845314, "units": "Tok/s", "t": 1712077196.4350672}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 74, "power": 309.98}}, "t": 1712077189.5855331}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 72, "power": 275.912}}, "t": 1712077190.0938396}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 268.552}}, "t": 1712077190.606417}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 271.333}}, "t": 1712077191.114859}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 272.974}}, "t": 1712077191.6246328}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 275.891}}, "t": 1712077192.1329181}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 263.091}}, "t": 1712077192.6435707}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 268.478}}, "t": 1712077193.1519084}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 271.11}}, "t": 1712077193.6617048}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 270.823}}, "t": 1712077194.169342}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 270.546}}, "t": 1712077194.6817408}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 271.186}}, "t": 1712077195.1900756}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 276.116}}, "t": 1712077195.703848}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 275.904}}, "t": 1712077196.2150166}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.361604690551758, total / elapsed =199.0043439753701 in_token_count =6 out_token_count =1857\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 199.0043439753701, "units": "Tok/s", "t": 1712077205.7966914}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 276.008}}, "t": 1712077196.724898}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 275.232}}, "t": 1712077197.234872}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 265.646}}, "t": 1712077197.7445264}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 275.411}}, "t": 1712077198.2540538}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 268.402}}, "t": 1712077198.7628279}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 274.941}}, "t": 1712077199.272262}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 262.252}}, "t": 1712077199.7817655}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 273.443}}, "t": 1712077200.290887}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 276.985}}, "t": 1712077200.802585}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 267.878}}, "t": 1712077201.3167758}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 266.035}}, "t": 1712077201.828062}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 266.841}}, "t": 1712077202.3356338}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 274.224}}, "t": 1712077202.8467946}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 273.772}}, "t": 1712077203.356591}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 270.949}}, "t": 1712077203.8661747}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 269.939}}, "t": 1712077204.3769643}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 265.174}}, "t": 1712077204.8853042}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 276.597}}, "t": 1712077205.396111}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.2115976810455322, total / elapsed =632.3955245038071 in_token_count =278 out_token_count =1753\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 632.3955245038071, "units": "Tok/s", "t": 1712077209.008307}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 72, "power": 321.446}}, "t": 1712077205.9039085}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 275.602}}, "t": 1712077206.4125881}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 280.078}}, "t": 1712077206.9216664}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 276.414}}, "t": 1712077207.4309158}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 269.521}}, "t": 1712077207.9402602}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 270.701}}, "t": 1712077208.4489868}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 280.802}}, "t": 1712077208.957864}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.364078044891357, total / elapsed =211.55312786833824 in_token_count =6 out_token_count =1975\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 211.55312786833824, "units": "Tok/s", "t": 1712077218.3724022}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 72, "power": 271.01}}, "t": 1712077209.4672763}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 72, "power": 277.486}}, "t": 1712077209.9773746}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 273.093}}, "t": 1712077210.490947}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 73, "power": 278.764}}, "t": 1712077210.998834}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 276.772}}, "t": 1712077211.5135221}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 281.273}}, "t": 1712077212.0246577}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 280.693}}, "t": 1712077212.5330274}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 277.257}}, "t": 1712077213.0422006}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 279.04}}, "t": 1712077213.5518956}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 280.208}}, "t": 1712077214.0613317}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 270.821}}, "t": 1712077214.5709658}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 273.59}}, "t": 1712077215.0801702}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 274.455}}, "t": 1712077215.589858}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 276.263}}, "t": 1712077216.0997112}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 270.718}}, "t": 1712077216.6120064}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 274.82}}, "t": 1712077217.1197004}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 276.088}}, "t": 1712077217.627298}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 280.515}}, "t": 1712077218.1350749}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =1.664726972579956, total / elapsed =1337.1562043895976 in_token_count =344 out_token_count =1882\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 1337.1562043895976, "units": "Tok/s", "t": 1712077220.0371437}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 75, "power": 286.919}}, "t": 1712077218.6444383}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 279.797}}, "t": 1712077219.1558807}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 276.814}}, "t": 1712077219.664499}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =7.145230531692505, total / elapsed =294.1822507582684 in_token_count =105 out_token_count =1997\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 294.1822507582684, "units": "Tok/s", "t": 1712077227.1823947}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 75, "power": 234.44}}, "t": 1712077220.175976}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 281.389}}, "t": 1712077220.6855218}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 73, "power": 268.454}}, "t": 1712077221.1933346}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 73, "power": 270.239}}, "t": 1712077221.705785}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 274.925}}, "t": 1712077222.2141185}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 279.692}}, "t": 1712077222.7255073}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 277.084}}, "t": 1712077223.2338326}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 277.75}}, "t": 1712077223.741903}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 278.637}}, "t": 1712077224.2501929}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 279.028}}, "t": 1712077224.760983}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 279.606}}, "t": 1712077225.269349}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 279.522}}, "t": 1712077225.7794144}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 276.219}}, "t": 1712077226.2871957}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 274.255}}, "t": 1712077226.7991114}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.142279148101807, total / elapsed =222.26405112798378 in_token_count =17 out_token_count =2015\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 222.26405112798378, "units": "Tok/s", "t": 1712077236.324694}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.94, "temperature": 74, "power": 272.351}}, "t": 1712077227.3074791}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 280.982}}, "t": 1712077227.8190534}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.94, "temperature": 73, "power": 277.424}}, "t": 1712077228.3266635}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 276.504}}, "t": 1712077228.8378255}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.94, "temperature": 73, "power": 274.974}}, "t": 1712077229.3460875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 269.452}}, "t": 1712077229.8551915}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 73, "power": 279.105}}, "t": 1712077230.364273}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.94, "temperature": 73, "power": 273.938}}, "t": 1712077230.8748405}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 280.208}}, "t": 1712077231.3843439}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 279.811}}, "t": 1712077231.8939593}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 282.078}}, "t": 1712077232.4064298}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 274.706}}, "t": 1712077232.9147422}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 270.508}}, "t": 1712077233.4275587}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 281.88}}, "t": 1712077233.9351544}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 282.93}}, "t": 1712077234.4445524}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 281.467}}, "t": 1712077234.952787}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 279.326}}, "t": 1712077235.4635131}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 279.088}}, "t": 1712077235.971866}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =4.6274635791778564, total / elapsed =474.3419288866617 in_token_count =216 out_token_count =1979\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 474.3419288866617, "units": "Tok/s", "t": 1712077240.952175}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 75, "power": 296.652}}, "t": 1712077236.4792805}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 281.954}}, "t": 1712077236.9895573}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 281.774}}, "t": 1712077237.4992352}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 284.565}}, "t": 1712077238.0082476}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 281.561}}, "t": 1712077238.5158758}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 282.147}}, "t": 1712077239.026364}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 280.004}}, "t": 1712077239.5361457}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 278.26}}, "t": 1712077240.0461094}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 278.118}}, "t": 1712077240.5559726}, "pipe": "data"} +{"event": "end", "data": {"command": ["python", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1712077241.82962, "return_code": 0}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/opt-1_3b-multinode.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/opt-1_3b-multinode.data new file mode 100644 index 000000000..3b45f0015 --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/opt-1_3b-multinode.data @@ -0,0 +1 @@ +{"event": "message", "data": {"message": "Skip opt-1_3b-multinode because the following capability is not satisfied: len(nodes) >= 2"}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/opt-1_3b.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/opt-1_3b.data new file mode 100644 index 000000000..8e5013d68 --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/opt-1_3b.data @@ -0,0 +1 @@ +{"event": "error", "data": {"type": "KeyError", "message": "'port'", "trace": "Traceback (most recent call last):\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/multi.py\", line 202, in do_run\n await exec_plan.execute(\"run\", timeout=True, timeout_delay=600)\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 108, in execute\n return await execute_command(self, phase, timeout, timeout_delay, **kwargs)\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/executors.py\", line 57, in execute_command\n for pack, argv, _kwargs in command.commands():\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 125, in commands\n yield self.pack, self.argv(), self.kwargs()\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 121, in argv\n return self._argv(**kwargs) + self.exec.argv(**kwargs)\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 304, in argv\n script_args = self.exec.argv(**kwargs)\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 122, in argv\n return self._argv(**kwargs)\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 682, in _argv\n f\"--main_process_port={manager['port']}\",\nKeyError: 'port'\n"}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/opt-6_7b-multinode.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/opt-6_7b-multinode.data new file mode 100644 index 000000000..cccd5c098 --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/opt-6_7b-multinode.data @@ -0,0 +1 @@ +{"event": "message", "data": {"message": "Skip opt-6_7b-multinode because the following capability is not satisfied: len(nodes) >= 2"}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/opt-6_7b.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/opt-6_7b.data new file mode 100644 index 000000000..8e5013d68 --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/opt-6_7b.data @@ -0,0 +1 @@ +{"event": "error", "data": {"type": "KeyError", "message": "'port'", "trace": "Traceback (most recent call last):\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/multi.py\", line 202, in do_run\n await exec_plan.execute(\"run\", timeout=True, timeout_delay=600)\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 108, in execute\n return await execute_command(self, phase, timeout, timeout_delay, **kwargs)\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/executors.py\", line 57, in execute_command\n for pack, argv, _kwargs in command.commands():\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 125, in commands\n yield self.pack, self.argv(), self.kwargs()\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 121, in argv\n return self._argv(**kwargs) + self.exec.argv(**kwargs)\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 304, in argv\n script_args = self.exec.argv(**kwargs)\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 122, in argv\n return self._argv(**kwargs)\n File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 682, in _argv\n f\"--main_process_port={manager['port']}\",\nKeyError: 'port'\n"}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/reformer.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/reformer.D0.data new file mode 100644 index 000000000..23619f512 --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/reformer.D0.data @@ -0,0 +1,291 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Reformer", "--batch-size": 64}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "transformer"], "weight": 1.0, "name": "reformer", "tag": ["reformer", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 73, "power": 106.069, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712078112.970693, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-reformer.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Reformer", "--batch-size", "64"], "time": 1712078112.9860363}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "line", "data": "We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 5.877161979675293}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.84889030456543}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 73, "power": 286.263}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.813286781311035}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.790142059326172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.774377822875977}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 74, "power": 250.749}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.76326847076416}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.755447864532471}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.38314549082414, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.75070333480835}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 74, "power": 302.534}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.842465612384196, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.745652198791504}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.63145888626222, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.739924907684326}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.68464590587847, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.735757827758789}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 0.98, "temperature": 75, "power": 319.986}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.85780509668206, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.7321648597717285}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.762408387688765, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.729575157165527}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.72479019546835, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.726365566253662}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 75, "power": 249.038}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.755593411803034, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.723445415496826}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.831244570885815, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.720085620880127}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 75, "power": 247.779}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.89020690222747, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.716990947723389}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.81798156705002, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.713686943054199}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.7296009861144, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.711978912353516}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 75, "power": 284.1}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.692559881338724, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.708196640014648}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.658595850231464, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.7056193351745605}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.57613715519568, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.702970504760742}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 75, "power": 257.581}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.590725761697435, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.701535224914551}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.65477032906796, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.698465347290039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.62810092492661, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.6952409744262695}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 76, "power": 311.354}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.61688596614815, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.693437576293945}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.587803623145646, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.690184116363525}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.53808015152288, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.687097549438477}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 76, "power": 292.283}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.602905049185736, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.684821605682373}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.60110854953425, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.6812567710876465}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 76, "power": 257.582}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.540157914588704, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.678829669952393}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.54124119881017, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.675886631011963}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.586025382781784, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.67181396484375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 76, "power": 286.584}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.530624329785496, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.6685662269592285}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.524061459611396, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.664484977722168}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.52944741216129, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.661380767822266}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 76, "power": 296.599}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.61860682635961, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.656714916229248}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.5812057995563, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.652444839477539}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.561579935613366, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.648081302642822}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 0.98, "temperature": 76, "power": 327.363}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.57395835085758, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.6426286697387695}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.642632169324386, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.636715888977051}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.56922587454374, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.631279468536377}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 76, "power": 276.736}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.49676511523986, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.624204158782959}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.504875316254754, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.617888450622559}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 76, "power": 294.356}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.5479032050464, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.609550952911377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.597072283192254, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.600521087646484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.50102088837983, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.592146873474121}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 76, "power": 310.488}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.502168636235126, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.585984706878662}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.569347800147355, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.573252201080322}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.57391487109742, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.566571235656738}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 76, "power": 319.918}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.57587548502977, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.552077770233154}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.58684800758302, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.538307189941406}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.6049095948316, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.525749206542969}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 76, "power": 282.145}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.59864445674683, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.51253080368042}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.55511203093286, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.498420238494873}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.47464814084089, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.486696243286133}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 76, "power": 322.228}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.55849644232166, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.4735894203186035}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.55699371971329, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.468693733215332}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 76, "power": 287.737}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.54037085673302, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.46619176864624}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.537931904178045, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.457893371582031}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.494300638209424, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.445857048034668}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 76, "power": 311.649}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.48301397106765, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.424015998840332}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.576752056001375, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.412559986114502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.56804997370876, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.405117034912109}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 77, "power": 323.327}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.52122714709638, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.394566535949707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.494944285711945, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.387679100036621}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.570233429364485, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.370873928070068}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 0.98, "temperature": 76, "power": 285.293}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.589726686012376, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 76, "power": 301.482}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-reformer.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Reformer", "--batch-size", "64"], "time": 1712078188.512789, "return_code": 0}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/regnet_y_128gf.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/regnet_y_128gf.D0.data new file mode 100644 index 000000000..21d55738c --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/regnet_y_128gf.D0.data @@ -0,0 +1,404 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "regnet_y_128gf", "--batch-size": 64}, "tags": ["classification", "convnet", "lstm", "resnet", "vision"], "weight": 2.0, "name": "regnet_y_128gf", "tag": ["regnet_y_128gf", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 71, "power": 101.054, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712077741.768871, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-regnet_y_128gf.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "regnet_y_128gf", "--batch-size", "64"], "time": 1712077741.785265}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 66, "power": 60.471}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3952.375, 81920.0], "load": 0, "temperature": 66, "power": 93.463}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03619384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00616455078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94366455078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92230224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.94, "temperature": 71, "power": 295.037}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02435302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.16741943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96478271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.15771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.99, "temperature": 72, "power": 301.568}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.017822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 84.98551842782294, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13763427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05963134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.44691104484649, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03607177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 73, "power": 323.234}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97662353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.56104566371464, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.35614013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.4664306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.64288594764278, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.239990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.99, "temperature": 73, "power": 294.725}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.78456530737698, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.17510986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.5086669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.10062097113837, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.4044189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9764404296875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 74, "power": 218.053}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.45869703157555, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06768798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.12451171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.32774359566, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07763671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01544189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 83.45148977677785, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.99, "temperature": 74, "power": 301.636}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04644775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14105224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.72912826553359, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.45257568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 66.62103209244626, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.43798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08331298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 75, "power": 318.633}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.82531964013918, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04290771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01446533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.34880028862328, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9464111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.25390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.08548985374642, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 75, "power": 286.569}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.16436767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.26544189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.19990384564295, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03173828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1256103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 85.53026260049819, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.007080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.96, "temperature": 76, "power": 337.914}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.10318274591698, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.047119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1983642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.73469147345757, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.35552978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92474365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.43881534569796, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.99, "temperature": 76, "power": 307.61}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2254638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.22601318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.7413024948707, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05743408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06365966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.12117892741466, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.277587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.96, "temperature": 76, "power": 305.139}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.32281494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 89.79228287366135, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 81.94335015751093, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.38861083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0684814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.56454854232919, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 76, "power": 288.174}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9925537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0540771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.23063421632908, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.20361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.15069580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.74113021364218, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.054931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.96, "temperature": 76, "power": 299.898}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.018310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.48645317773433, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.141845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9903564453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.13518332940372, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00531005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.1342006002105, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86383056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 77, "power": 307.189}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1290283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.64287334170248, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7943115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.95451562098192, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00726318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.99, "temperature": 76, "power": 306.915}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90667724609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 65.00787395176391, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90716552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01605224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.05649538510784, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.853515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.97, "temperature": 76, "power": 314.279}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.83291793384016, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98956298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 80.42923125283956, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0294189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84466552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 86.02788063200617, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.95, "temperature": 77, "power": 295.41}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89312744140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 74.86631084853728, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9566650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14141845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.52137661800205, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14886474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 77, "power": 295.016}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1663818359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.01757209606161, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2611083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05230712890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.47794871984293, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.191650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1234130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 78.74383658456956, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.99, "temperature": 77, "power": 296.705}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02764892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.987060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 88.09311072363006, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0841064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 74.8300009407885, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92816162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04681396484375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 77, "power": 298.61}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.50946321846355, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9100341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96356201171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.86461140283208, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97906494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.5179443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.43861316609807, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 77, "power": 306.71}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2796630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.15191650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.85908370621165, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99945068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.021728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.8063557629032, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10284423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.99, "temperature": 78, "power": 326.798}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 74.55281723342608, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02862548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8790283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.26970529071299, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13043212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05841064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.62476504552366, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 78, "power": 317.391}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0330810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97674560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.38286047887179, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.036376953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.22381591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 80.08832455780012, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03515625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 78, "power": 330.027}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04339599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 86.02331163464297, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.39404296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 74.51878076804775, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99554443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.37728952473498, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 78, "power": 299.789}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.09002685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.77026642514522, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98956298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0643310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.38854037479985, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.96, "temperature": 78, "power": 304.065}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2716064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 78.39624291395576, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99871826171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9783935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 88.24513712055776, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92926025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 74.56261801281862, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 78, "power": 310.599}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 78, "power": 310.599}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-regnet_y_128gf.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "regnet_y_128gf", "--batch-size", "64"], "time": 1712077833.3021524, "return_code": 0}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/resnet152-multi.0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/resnet152-multi.0.data new file mode 100644 index 000000000..aa6e9b4c9 --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/resnet152-multi.0.data @@ -0,0 +1,304 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "njobs", "n": 1}, "argv": {"--amp": true, "--model": "resnet152", "--batch-size": 256}, "tags": ["classification", "convnet", "multigpu", "resnet", "vision"], "weight": 5.0, "name": "resnet152-multi", "tag": ["resnet152-multi", "0"], "job-number": 0, "devices": ["0"]}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.01, "memory": 0.010771942138671876}, "temperature": 70, "power": 99.706, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712078410.803939, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/resnet152-multi.0", "--checkpoint-hist", "1"], "time": 1712078410.8207672}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model resnet152 created, param count:60192808\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.1) calculated from base learning rate (0.1) and global batch size (256) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.928679466247559}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23996.375, 81920.0], "load": 1.0, "temperature": 70, "power": 284.718}}}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 0/16 ( 0%)] Loss: 6.929 (6.93) Time: 3.440s, 74.42/s (3.440s, 74.42/s) LR: 1.000e-05 Data: 1.245 (1.245)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.93458366394043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958017349243164}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27176.375, 81920.0], "load": 1.0, "temperature": 71, "power": 312.674}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.951747417449951}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938842296600342}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27176.375, 81920.0], "load": 0.94, "temperature": 72, "power": 322.148}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.954232215881348}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 15/16 (100%)] Loss: 6.934 (6.94) Time: 0.362s, 707.33/s (0.584s, 438.65/s) LR: 1.000e-05 Data: 0.000 (0.094)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.135 (1.135) Loss: 6.9339 (6.9339) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.256 (0.255) Loss: 6.8979 (6.9128) Acc@1: 0.0000 ( 0.1453) Acc@5: 28.1250 ( 0.5329)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/resnet152-multi.0/20240402-172015-resnet152-224/checkpoint-0.pth.tar', 0.14534883720930233)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 707.0748652460082, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27660.375, 81920.0], "load": 0.5, "temperature": 68, "power": 94.45}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4266.375, 81920.0], "load": 1.0, "temperature": 68, "power": 96.145}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.963781833648682}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 0/16 ( 0%)] Loss: 6.964 (6.96) Time: 1.407s, 182.00/s (1.407s, 182.00/s) LR: 2.001e-02 Data: 1.042 (1.042)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.906595230102539}, "pipe": "data"} +{"event": "data", "data": {"rate": 679.20529280488, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27194.375, 81920.0], "load": 0.99, "temperature": 72, "power": 321.097}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 638.6815951304361, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.927788257598877}, "pipe": "data"} +{"event": "data", "data": {"rate": 622.7818045761736, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.902334213256836}, "pipe": "data"} +{"event": "data", "data": {"rate": 625.4338815218415, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.895504951477051}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27194.375, 81920.0], "load": 0.99, "temperature": 72, "power": 325.818}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 625.0208675993767, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.976802825927734}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 15/16 (100%)] Loss: 6.977 (6.92) Time: 0.365s, 701.13/s (0.441s, 579.85/s) LR: 2.001e-02 Data: 0.000 (0.076)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.979 (0.979) Loss: 6.8470 (6.8470) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.229) Loss: 6.5615 (6.8342) Acc@1: 18.7500 ( 0.2422) Acc@5: 28.1250 ( 1.2355)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/resnet152-multi.0/20240402-172015-resnet152-224/checkpoint-1.pth.tar', 0.24224806201550386)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 700.26152839631, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27678.375, 81920.0], "load": 0, "temperature": 68, "power": 94.104}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27678.375, 81920.0], "load": 0.03, "temperature": 68, "power": 94.158}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.833113193511963}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 0/16 ( 0%)] Loss: 6.833 (6.83) Time: 1.402s, 182.66/s (1.402s, 182.66/s) LR: 4.001e-02 Data: 1.036 (1.036)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.856740474700928}, "pipe": "data"} +{"event": "data", "data": {"rate": 686.0018190892456, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28162.375, 81920.0], "load": 0.99, "temperature": 71, "power": 256.65}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 618.474316824814, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.901230335235596}, "pipe": "data"} +{"event": "data", "data": {"rate": 620.2816104679656, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94493293762207}, "pipe": "data"} +{"event": "data", "data": {"rate": 622.0104375311398, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9797515869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28162.375, 81920.0], "load": 0.99, "temperature": 72, "power": 315.93}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 621.2230583436107, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.940389156341553}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 15/16 (100%)] Loss: 6.940 (6.92) Time: 0.366s, 698.50/s (0.442s, 579.01/s) LR: 4.001e-02 Data: 0.000 (0.076)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.983 (0.983) Loss: 6.8053 (6.8053) Acc@1: 0.7812 ( 0.7812) Acc@5: 3.1250 ( 3.1250)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.019 (0.234) Loss: 6.3810 (6.8034) Acc@1: 0.0000 ( 0.1453) Acc@5: 3.1250 ( 1.1386)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 699.0112668574657, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28646.375, 81920.0], "load": 0.55, "temperature": 71, "power": 301.018}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28646.375, 81920.0], "load": 0, "temperature": 68, "power": 94.158}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.861546039581299}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 0/16 ( 0%)] Loss: 6.862 (6.86) Time: 1.419s, 180.38/s (1.419s, 180.38/s) LR: 6.000e-02 Data: 1.054 (1.054)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 574.9147911994536, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.910906791687012}, "pipe": "data"} +{"event": "data", "data": {"rate": 615.3708341350615, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.954673767089844}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29130.375, 81920.0], "load": 0.92, "temperature": 72, "power": 296.694}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 619.048402262069, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.991860389709473}, "pipe": "data"} +{"event": "data", "data": {"rate": 618.8276175417132, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.054144382476807}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29130.375, 81920.0], "load": 0.94, "temperature": 73, "power": 261.696}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 622.4347028431811, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03437614440918}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 15/16 (100%)] Loss: 7.034 (6.96) Time: 0.366s, 699.12/s (0.444s, 576.57/s) LR: 6.000e-02 Data: 0.000 (0.077)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.969 (0.969) Loss: 6.9324 (6.9324) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.3906 ( 0.3906)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.227) Loss: 6.3954 (6.8244) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 1.1143)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/resnet152-multi.0/20240402-172015-resnet152-224/checkpoint-3.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 698.2562199211644, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29614.375, 81920.0], "load": 0.9, "temperature": 71, "power": 92.878}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29614.375, 81920.0], "load": 0, "temperature": 68, "power": 93.866}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.864370346069336}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 0/16 ( 0%)] Loss: 6.864 (6.86) Time: 1.374s, 186.26/s (1.374s, 186.26/s) LR: 8.000e-02 Data: 1.008 (1.008)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 633.9749328466077, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.925772666931152}, "pipe": "data"} +{"event": "data", "data": {"rate": 616.5410220554338, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.979971885681152}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30098.375, 81920.0], "load": 0.94, "temperature": 73, "power": 309.73}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 616.7090113287061, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.047486305236816}, "pipe": "data"} +{"event": "data", "data": {"rate": 621.240783444034, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0692853927612305}, "pipe": "data"} +{"event": "data", "data": {"rate": 621.1234848272869, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03903341293335}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30098.375, 81920.0], "load": 0.99, "temperature": 73, "power": 304.283}}}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 15/16 (100%)] Loss: 7.039 (7.00) Time: 0.368s, 695.92/s (0.442s, 579.76/s) LR: 8.000e-02 Data: 0.001 (0.075)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.968 (0.968) Loss: 6.8013 (6.8013) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.020 (0.232) Loss: 6.5208 (6.8458) Acc@1: 0.0000 ( 0.1938) Acc@5: 0.0000 ( 1.2597)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 695.1499309214444, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30582.375, 81920.0], "load": 0.49, "temperature": 72, "power": 322.828}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30582.375, 81920.0], "load": 0, "temperature": 68, "power": 93.963}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.870933532714844}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 0/16 ( 0%)] Loss: 6.871 (6.87) Time: 1.461s, 175.25/s (1.461s, 175.25/s) LR: 9.993e-02 Data: 1.095 (1.095)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 617.4417904279894, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.974578857421875}, "pipe": "data"} +{"event": "data", "data": {"rate": 682.7613338014693, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0785064697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31066.375, 81920.0], "load": 0.97, "temperature": 73, "power": 321.738}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 696.4129160798198, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1159820556640625}, "pipe": "data"} +{"event": "data", "data": {"rate": 594.1094227114396, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.087150573730469}, "pipe": "data"} +{"event": "data", "data": {"rate": 618.0559025190126, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.095391273498535}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 15/16 (100%)] Loss: 7.095 (7.04) Time: 0.367s, 698.06/s (0.447s, 573.04/s) LR: 9.993e-02 Data: 0.000 (0.080)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31066.375, 81920.0], "load": 1.0, "temperature": 74, "power": 307.973}}}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.952 (0.952) Loss: 6.9660 (6.9660) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.019 (0.227) Loss: 6.3292 (6.8915) Acc@1: 0.0000 ( 0.2180) Acc@5: 0.0000 ( 0.9690)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 697.274328954285, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31550.375, 81920.0], "load": 0.95, "temperature": 73, "power": 263.384}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.892255783081055}, "pipe": "data"} +{"event": "line", "data": "Train: 6 [ 0/16 ( 0%)] Loss: 6.892 (6.89) Time: 1.440s, 177.72/s (1.440s, 177.72/s) LR: 9.990e-02 Data: 1.074 (1.074)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31886.375, 81920.0], "load": 0.78, "temperature": 71, "power": 305.17}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 533.4449433516061, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.007454872131348}, "pipe": "data"} +{"event": "data", "data": {"rate": 667.5955354506842, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.977649688720703}, "pipe": "data"} +{"event": "data", "data": {"rate": 648.5725892702532, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32034.375, 81920.0], "load": 0.99, "temperature": 73, "power": 308.155}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.015753746032715}, "pipe": "data"} +{"event": "data", "data": {"rate": 695.525135994952, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.076228141784668}, "pipe": "data"} +{"event": "data", "data": {"rate": 595.2128546036964, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.142056465148926}, "pipe": "data"} +{"event": "line", "data": "Train: 6 [ 15/16 (100%)] Loss: 7.142 (7.03) Time: 0.368s, 695.64/s (0.446s, 574.31/s) LR: 9.990e-02 Data: 0.000 (0.078)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 623.5005476530862, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.005 (1.005) Loss: 6.8032 (6.8032) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.234) Loss: 6.4867 (6.8581) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 1.2112)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32034.375, 81920.0], "load": 0.99, "temperature": 71, "power": 96.91}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 695.5290766066321, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32518.375, 81920.0], "load": 0.9, "temperature": 70, "power": 296.207}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.864059925079346}, "pipe": "data"} +{"event": "line", "data": "Train: 7 [ 0/16 ( 0%)] Loss: 6.864 (6.86) Time: 1.468s, 174.39/s (1.468s, 174.39/s) LR: 9.987e-02 Data: 1.101 (1.101)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32854.375, 81920.0], "load": 0.99, "temperature": 72, "power": 294.527}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.928898811340332}, "pipe": "data"} +{"event": "data", "data": {"rate": 616.3868346950447, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.994691848754883}, "pipe": "data"} +{"event": "data", "data": {"rate": 617.5208853443247, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33002.375, 81920.0], "load": 0.99, "temperature": 73, "power": 301.93}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.032052516937256}, "pipe": "data"} +{"event": "data", "data": {"rate": 616.4052911726154, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0799455642700195}, "pipe": "data"} +{"event": "data", "data": {"rate": 616.508541577737, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.042294025421143}, "pipe": "data"} +{"event": "line", "data": "Train: 7 [ 15/16 (100%)] Loss: 7.042 (6.99) Time: 0.368s, 695.99/s (0.448s, 571.67/s) LR: 9.987e-02 Data: 0.000 (0.080)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 626.9884529569433, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.957 (0.957) Loss: 6.8256 (6.8256) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.226) Loss: 6.5559 (6.8334) Acc@1: 0.0000 ( 0.2422) Acc@5: 0.0000 ( 0.9448)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33002.375, 81920.0], "load": 0.48, "temperature": 70, "power": 96.731}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 695.8750108665911, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33486.375, 81920.0], "load": 0, "temperature": 72, "power": 324.307}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.889812469482422}, "pipe": "data"} +{"event": "line", "data": "Train: 8 [ 0/16 ( 0%)] Loss: 6.890 (6.89) Time: 1.404s, 182.30/s (1.404s, 182.30/s) LR: 9.982e-02 Data: 1.038 (1.038)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 637.3029577815294, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33970.375, 81920.0], "load": 0.94, "temperature": 73, "power": 289.482}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.915931701660156}, "pipe": "data"} +{"event": "data", "data": {"rate": 696.7677306062536, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.943663597106934}, "pipe": "data"} +{"event": "data", "data": {"rate": 594.5455805506919, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.988678932189941}, "pipe": "data"} +{"event": "data", "data": {"rate": 615.5034218475955, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33970.375, 81920.0], "load": 0.94, "temperature": 73, "power": 223.147}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94257116317749}, "pipe": "data"} +{"event": "data", "data": {"rate": 618.5969795861934, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.018918037414551}, "pipe": "data"} +{"event": "line", "data": "Train: 8 [ 15/16 (100%)] Loss: 7.019 (6.94) Time: 0.369s, 692.84/s (0.444s, 576.78/s) LR: 9.982e-02 Data: 0.000 (0.076)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 623.3629640368691, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.946 (0.946) Loss: 6.8033 (6.8033) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.225) Loss: 6.6473 (6.8228) Acc@1: 0.0000 ( 0.2907) Acc@5: 0.0000 ( 1.0174)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/resnet152-multi.0/20240402-172015-resnet152-224/checkpoint-8.pth.tar', 0.29069767441860467)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33970.375, 81920.0], "load": 0, "temperature": 70, "power": 96.035}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 692.5510732339538, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34454.375, 81920.0], "load": 0.94, "temperature": 73, "power": 305.064}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.901410102844238}, "pipe": "data"} +{"event": "line", "data": "Train: 9 [ 0/16 ( 0%)] Loss: 6.901 (6.90) Time: 1.474s, 173.70/s (1.474s, 173.70/s) LR: 9.978e-02 Data: 1.106 (1.106)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34938.375, 81920.0], "load": 0.93, "temperature": 73, "power": 304.756}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.856259822845459}, "pipe": "data"} +{"event": "data", "data": {"rate": 618.4951658429918, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.910282135009766}, "pipe": "data"} +{"event": "data", "data": {"rate": 619.7440089592776, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.907938003540039}, "pipe": "data"} +{"event": "data", "data": {"rate": 619.0867586660061, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34938.375, 81920.0], "load": 0.93, "temperature": 74, "power": 305.63}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.029355049133301}, "pipe": "data"} +{"event": "data", "data": {"rate": 620.5607649355844, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.980498313903809}, "pipe": "data"} +{"event": "line", "data": "Train: 9 [ 15/16 (100%)] Loss: 6.980 (6.92) Time: 0.368s, 696.03/s (0.448s, 571.99/s) LR: 9.978e-02 Data: 0.000 (0.081)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 624.7825707842533, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.963 (0.963) Loss: 6.7888 (6.7888) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.3906 ( 0.3906)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.227) Loss: 6.4438 (6.8076) Acc@1: 0.0000 ( 0.3391) Acc@5: 3.1250 ( 1.2355)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/resnet152-multi.0/20240402-172015-resnet152-224/checkpoint-9.pth.tar', 0.3391472868217054)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 571.2661482212146, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35422.375, 81920.0], "load": 0, "temperature": 73, "power": 247.331}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35422.375, 81920.0], "load": 0.55, "temperature": 70, "power": 97.133}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.832240104675293}, "pipe": "data"} +{"event": "line", "data": "Train: 10 [ 0/16 ( 0%)] Loss: 6.832 (6.83) Time: 1.394s, 183.65/s (1.394s, 183.65/s) LR: 9.973e-02 Data: 1.027 (1.027)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 665.8569343484285, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.885162830352783}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35906.375, 81920.0], "load": 0.92, "temperature": 73, "power": 300.585}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 696.3805943225001, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.871856689453125}, "pipe": "data"} +{"event": "data", "data": {"rate": 593.0976791067272, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.896815299987793}, "pipe": "data"} +{"event": "data", "data": {"rate": 615.5346879696415, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "stop", "data": null, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/resnet152-multi.0", "--checkpoint-hist", "1"], "time": 1712078541.038392, "return_code": -15}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/resnet152.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/resnet152.D0.data new file mode 100644 index 000000000..a96f18118 --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/resnet152.D0.data @@ -0,0 +1,305 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "resnet152", "--batch-size": 256}, "tags": ["classification", "convnet", "resnet", "vision"], "weight": 1.0, "name": "resnet152", "tag": ["resnet152", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 73, "power": 105.551, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712078276.538776, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/resnet152.D0", "--checkpoint-hist", "1"], "time": 1712078276.5554135}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model resnet152 created, param count:60192808\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.1) calculated from base learning rate (0.1) and global batch size (256) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.928679466247559}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24052.375, 81920.0], "load": 1.0, "temperature": 72, "power": 312.946}}}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 0/16 ( 0%)] Loss: 6.929 (6.93) Time: 3.501s, 73.12/s (3.501s, 73.12/s) LR: 1.000e-05 Data: 1.316 (1.316)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.93458366394043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958017349243164}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 287.845}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.951747417449951}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938842296600342}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27182.375, 81920.0], "load": 0.94, "temperature": 73, "power": 318.408}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.954232215881348}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 15/16 (100%)] Loss: 6.934 (6.94) Time: 0.362s, 707.91/s (0.587s, 436.42/s) LR: 1.000e-05 Data: 0.000 (0.098)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.098 (1.098) Loss: 6.9339 (6.9339) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.240 (0.249) Loss: 6.8979 (6.9128) Acc@1: 0.0000 ( 0.1453) Acc@5: 28.1250 ( 0.5329)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/resnet152.D0/20240402-171801-resnet152-224/checkpoint-0.pth.tar', 0.14534883720930233)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 707.9541880300716, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27666.375, 81920.0], "load": 0, "temperature": 70, "power": 97.036}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4978.375, 81920.0], "load": 1.0, "temperature": 69, "power": 98.775}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.963781833648682}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 0/16 ( 0%)] Loss: 6.964 (6.96) Time: 1.363s, 187.88/s (1.363s, 187.88/s) LR: 2.001e-02 Data: 0.997 (0.997)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.906595230102539}, "pipe": "data"} +{"event": "data", "data": {"rate": 623.9048738088012, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27244.375, 81920.0], "load": 0.91, "temperature": 73, "power": 301.233}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.927788257598877}, "pipe": "data"} +{"event": "data", "data": {"rate": 623.9586897230192, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.902334213256836}, "pipe": "data"} +{"event": "data", "data": {"rate": 621.1864389862641, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.895504951477051}, "pipe": "data"} +{"event": "data", "data": {"rate": 622.9994235211116, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27244.375, 81920.0], "load": 0.92, "temperature": 74, "power": 279.396}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.976802825927734}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 15/16 (100%)] Loss: 6.977 (6.92) Time: 0.366s, 698.58/s (0.439s, 582.81/s) LR: 2.001e-02 Data: 0.000 (0.074)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 630.6763576998, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.006 (1.006) Loss: 6.8470 (6.8470) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.232) Loss: 6.5615 (6.8342) Acc@1: 18.7500 ( 0.2422) Acc@5: 28.1250 ( 1.2355)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/resnet152.D0/20240402-171801-resnet152-224/checkpoint-1.pth.tar', 0.24224806201550386)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 698.4095060299613, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27730.375, 81920.0], "load": 0.84, "temperature": 73, "power": 278.193}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27730.375, 81920.0], "load": 0.02, "temperature": 69, "power": 95.645}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.833113193511963}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 0/16 ( 0%)] Loss: 6.833 (6.83) Time: 1.391s, 184.08/s (1.391s, 184.08/s) LR: 4.001e-02 Data: 1.025 (1.025)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.856740474700928}, "pipe": "data"} +{"event": "data", "data": {"rate": 621.7290187676313, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28214.375, 81920.0], "load": 0.93, "temperature": 74, "power": 303.545}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.901230335235596}, "pipe": "data"} +{"event": "data", "data": {"rate": 622.2212650529127, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94493293762207}, "pipe": "data"} +{"event": "data", "data": {"rate": 619.3351932974532, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9797515869140625}, "pipe": "data"} +{"event": "data", "data": {"rate": 620.65949349532, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28214.375, 81920.0], "load": 0.93, "temperature": 74, "power": 301.451}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.940389156341553}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 15/16 (100%)] Loss: 6.940 (6.92) Time: 0.367s, 696.73/s (0.442s, 579.46/s) LR: 4.001e-02 Data: 0.000 (0.075)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 626.2283912249218, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.960 (0.960) Loss: 6.8053 (6.8053) Acc@1: 0.7812 ( 0.7812) Acc@5: 3.1250 ( 3.1250)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.019 (0.228) Loss: 6.3810 (6.8034) Acc@1: 0.0000 ( 0.1453) Acc@5: 3.1250 ( 1.1386)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 696.7547428530818, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28698.375, 81920.0], "load": 0.91, "temperature": 71, "power": 112.35}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28698.375, 81920.0], "load": 0, "temperature": 69, "power": 95.158}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.861546039581299}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 0/16 ( 0%)] Loss: 6.862 (6.86) Time: 1.363s, 187.80/s (1.363s, 187.80/s) LR: 6.000e-02 Data: 0.997 (0.997)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 538.5215973739532, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.910906791687012}, "pipe": "data"} +{"event": "data", "data": {"rate": 697.3763586312554, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.954673767089844}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29182.375, 81920.0], "load": 0.96, "temperature": 73, "power": 254.949}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 683.0387515712258, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.991860389709473}, "pipe": "data"} +{"event": "data", "data": {"rate": 615.5464197513131, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.054144382476807}, "pipe": "data"} +{"event": "data", "data": {"rate": 619.0368784046575, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29182.375, 81920.0], "load": 0.92, "temperature": 74, "power": 302.369}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03437614440918}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 15/16 (100%)] Loss: 7.034 (6.96) Time: 0.368s, 696.13/s (0.441s, 581.11/s) LR: 6.000e-02 Data: 0.000 (0.074)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 626.3523568231118, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.004 (1.004) Loss: 6.9324 (6.9324) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.3906 ( 0.3906)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.235) Loss: 6.3954 (6.8244) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 1.1143)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/resnet152.D0/20240402-171801-resnet152-224/checkpoint-3.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 696.1512481479284, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29666.375, 81920.0], "load": 0, "temperature": 73, "power": 336.232}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29666.375, 81920.0], "load": 0, "temperature": 69, "power": 94.664}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.864370346069336}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 0/16 ( 0%)] Loss: 6.864 (6.86) Time: 1.412s, 181.26/s (1.412s, 181.26/s) LR: 8.000e-02 Data: 1.045 (1.045)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.925772666931152}, "pipe": "data"} +{"event": "data", "data": {"rate": 685.7684773436212, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 696.4598174842167, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.979971885681152}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30150.375, 81920.0], "load": 0.99, "temperature": 72, "power": 318.327}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 593.343613150496, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.047486305236816}, "pipe": "data"} +{"event": "data", "data": {"rate": 619.3405544189388, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0692853927612305}, "pipe": "data"} +{"event": "data", "data": {"rate": 618.3821764078899, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03903341293335}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30150.375, 81920.0], "load": 0.99, "temperature": 74, "power": 260.613}}}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 15/16 (100%)] Loss: 7.039 (7.00) Time: 0.367s, 697.38/s (0.444s, 576.54/s) LR: 8.000e-02 Data: 0.001 (0.077)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.978 (0.978) Loss: 6.8013 (6.8013) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.019 (0.228) Loss: 6.5208 (6.8458) Acc@1: 0.0000 ( 0.1938) Acc@5: 0.0000 ( 1.2597)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 695.8855858517969, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30634.375, 81920.0], "load": 0.56, "temperature": 73, "power": 314.793}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.870933532714844}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30970.375, 81920.0], "load": 0.01, "temperature": 69, "power": 253.158}}}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 0/16 ( 0%)] Loss: 6.871 (6.87) Time: 1.413s, 181.18/s (1.413s, 181.18/s) LR: 9.993e-02 Data: 1.046 (1.046)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 696.742391472384, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.974578857421875}, "pipe": "data"} +{"event": "data", "data": {"rate": 595.0913965193677, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0785064697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31118.375, 81920.0], "load": 1.0, "temperature": 73, "power": 179.321}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 617.5611905017515, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1159820556640625}, "pipe": "data"} +{"event": "data", "data": {"rate": 617.7851915182639, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.087150573730469}, "pipe": "data"} +{"event": "data", "data": {"rate": 619.0399266090767, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.095391273498535}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 15/16 (100%)] Loss: 7.095 (7.04) Time: 0.368s, 696.15/s (0.444s, 576.61/s) LR: 9.993e-02 Data: 0.001 (0.077)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.010 (1.010) Loss: 6.9660 (6.9660) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.019 (0.236) Loss: 6.3292 (6.8915) Acc@1: 0.0000 ( 0.2180) Acc@5: 0.0000 ( 0.9690)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31118.375, 81920.0], "load": 1.0, "temperature": 72, "power": 297.346}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 695.0092799417304, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31602.375, 81920.0], "load": 0.94, "temperature": 73, "power": 305.34}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.892255783081055}, "pipe": "data"} +{"event": "line", "data": "Train: 6 [ 0/16 ( 0%)] Loss: 6.892 (6.89) Time: 1.391s, 184.03/s (1.391s, 184.03/s) LR: 9.990e-02 Data: 1.025 (1.025)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31938.375, 81920.0], "load": 0.95, "temperature": 72, "power": 305.382}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 602.8188939411352, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.007454872131348}, "pipe": "data"} +{"event": "data", "data": {"rate": 657.6032740875111, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.977649688720703}, "pipe": "data"} +{"event": "data", "data": {"rate": 655.8522315460139, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32086.375, 81920.0], "load": 1.0, "temperature": 74, "power": 305.829}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.015753746032715}, "pipe": "data"} +{"event": "data", "data": {"rate": 696.5809807330335, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.076228141784668}, "pipe": "data"} +{"event": "data", "data": {"rate": 595.7863043876953, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.142056465148926}, "pipe": "data"} +{"event": "line", "data": "Train: 6 [ 15/16 (100%)] Loss: 7.142 (7.03) Time: 0.368s, 694.85/s (0.443s, 577.67/s) LR: 9.990e-02 Data: 0.000 (0.076)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.994 (0.994) Loss: 6.8032 (6.8032) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.230) Loss: 6.4867 (6.8581) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 1.2112)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32086.375, 81920.0], "load": 1.0, "temperature": 71, "power": 98.12}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 696.2036233078143, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32570.375, 81920.0], "load": 0.77, "temperature": 70, "power": 96.341}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.864059925079346}, "pipe": "data"} +{"event": "line", "data": "Train: 7 [ 0/16 ( 0%)] Loss: 6.864 (6.86) Time: 1.400s, 182.84/s (1.400s, 182.84/s) LR: 9.987e-02 Data: 1.034 (1.034)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33054.375, 81920.0], "load": 0.99, "temperature": 73, "power": 252.497}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 536.2713023667408, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.928898811340332}, "pipe": "data"} +{"event": "data", "data": {"rate": 684.5163674031223, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.994691848754883}, "pipe": "data"} +{"event": "data", "data": {"rate": 633.0877114856806, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.032052516937256}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33054.375, 81920.0], "load": 0.99, "temperature": 72, "power": 305.566}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 695.4763387217931, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0799455642700195}, "pipe": "data"} +{"event": "data", "data": {"rate": 592.7090214989917, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.042294025421143}, "pipe": "data"} +{"event": "line", "data": "Train: 7 [ 15/16 (100%)] Loss: 7.042 (6.99) Time: 0.368s, 696.07/s (0.443s, 577.81/s) LR: 9.987e-02 Data: 0.000 (0.076)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 624.5549192316796, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.966 (0.966) Loss: 6.8256 (6.8256) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.234) Loss: 6.5559 (6.8334) Acc@1: 0.0000 ( 0.2422) Acc@5: 0.0000 ( 0.9448)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33054.375, 81920.0], "load": 0, "temperature": 70, "power": 96.828}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 582.3516158983447, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33538.375, 81920.0], "load": 0, "temperature": 73, "power": 96.145}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.889812469482422}, "pipe": "data"} +{"event": "line", "data": "Train: 8 [ 0/16 ( 0%)] Loss: 6.890 (6.89) Time: 1.427s, 179.42/s (1.427s, 179.42/s) LR: 9.982e-02 Data: 1.059 (1.059)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34022.375, 81920.0], "load": 0.92, "temperature": 73, "power": 302.043}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.915931701660156}, "pipe": "data"} +{"event": "data", "data": {"rate": 614.8461687789592, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.943663597106934}, "pipe": "data"} +{"event": "data", "data": {"rate": 617.023210998383, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.988678932189941}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34022.375, 81920.0], "load": 0.93, "temperature": 73, "power": 296.011}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 616.8598450954569, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94257116317749}, "pipe": "data"} +{"event": "data", "data": {"rate": 619.0919263451244, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.018918037414551}, "pipe": "data"} +{"event": "line", "data": "Train: 8 [ 15/16 (100%)] Loss: 7.019 (6.94) Time: 0.368s, 695.98/s (0.445s, 574.76/s) LR: 9.982e-02 Data: 0.000 (0.078)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 625.3929211193839, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.979 (0.979) Loss: 6.8033 (6.8033) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.020 (0.228) Loss: 6.6473 (6.8228) Acc@1: 0.0000 ( 0.2907) Acc@5: 0.0000 ( 1.0174)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/resnet152.D0/20240402-171801-resnet152-224/checkpoint-8.pth.tar', 0.29069767441860467)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34022.375, 81920.0], "load": 0, "temperature": 70, "power": 96.536}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 677.5896069972074, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34506.375, 81920.0], "load": 0.95, "temperature": 73, "power": 201.862}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.901410102844238}, "pipe": "data"} +{"event": "line", "data": "Train: 9 [ 0/16 ( 0%)] Loss: 6.901 (6.90) Time: 1.391s, 183.99/s (1.391s, 183.99/s) LR: 9.978e-02 Data: 1.025 (1.025)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34990.375, 81920.0], "load": 0.99, "temperature": 72, "power": 148.201}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.856259822845459}, "pipe": "data"} +{"event": "data", "data": {"rate": 617.6672239126855, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.910282135009766}, "pipe": "data"} +{"event": "data", "data": {"rate": 618.1281290417545, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.907938003540039}, "pipe": "data"} +{"event": "data", "data": {"rate": 616.6640415754832, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34990.375, 81920.0], "load": 0.99, "temperature": 73, "power": 238.836}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.029355049133301}, "pipe": "data"} +{"event": "data", "data": {"rate": 620.4166658224798, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.980498313903809}, "pipe": "data"} +{"event": "line", "data": "Train: 9 [ 15/16 (100%)] Loss: 6.980 (6.92) Time: 0.369s, 694.44/s (0.443s, 578.30/s) LR: 9.978e-02 Data: 0.000 (0.075)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 624.281044459124, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.979 (0.979) Loss: 6.7888 (6.7888) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.3906 ( 0.3906)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.020 (0.231) Loss: 6.4438 (6.8076) Acc@1: 0.0000 ( 0.3391) Acc@5: 3.1250 ( 1.2355)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/resnet152.D0/20240402-171801-resnet152-224/checkpoint-9.pth.tar', 0.3391472868217054)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34990.375, 81920.0], "load": 0, "temperature": 70, "power": 95.84}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 654.9093100646307, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35474.375, 81920.0], "load": 0.89, "temperature": 74, "power": 305.818}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.832240104675293}, "pipe": "data"} +{"event": "line", "data": "Train: 10 [ 0/16 ( 0%)] Loss: 6.832 (6.83) Time: 1.443s, 177.44/s (1.443s, 177.44/s) LR: 9.973e-02 Data: 1.075 (1.075)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35958.375, 81920.0], "load": 1.0, "temperature": 73, "power": 305.448}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 696.1347333752168, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.885162830352783}, "pipe": "data"} +{"event": "data", "data": {"rate": 590.2609266641402, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.871856689453125}, "pipe": "data"} +{"event": "data", "data": {"rate": 616.5251300640239, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.896815299987793}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35958.375, 81920.0], "load": 0.99, "temperature": 73, "power": 191.682}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 616.4410149950778, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "stop", "data": null, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/majaguma.2024-04-02_16:55:21.895752/resnet152.D0", "--checkpoint-hist", "1"], "time": 1712078408.1757667, "return_code": -15}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/resnet50.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/resnet50.D0.data new file mode 100644 index 000000000..bb6c1c2fb --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/resnet50.D0.data @@ -0,0 +1,2207 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "resnet50", "--batch-size": 64}, "tags": ["classification", "convnet", "resnet", "vision"], "weight": 1.0, "name": "resnet50", "tag": ["resnet50", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 72, "power": 103.998, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712077415.156174, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-resnet50.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "resnet50", "--batch-size", "64"], "time": 1712077415.17404}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0189208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.141357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1485595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07318115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0589599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13543701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06268310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1090087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1446533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08746337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1492919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0975341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11920166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0943603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.185791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11456298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02716064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13958740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04656982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.994384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99810791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9683837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03021240234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0460205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11968994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06439208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.8, "temperature": 71, "power": 248.614}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01129150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03814697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.12310791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9730224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06427001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89617919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.12481689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9935302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92803955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9180908203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92852783203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0045166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0550537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.053955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08026123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0406494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06060791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01177978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9913330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.995849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0933837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.084716796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04132080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97540283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11065673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03277587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.925048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0076904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.011474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0970458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90789794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86712646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.74407958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85906982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98822021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88079833984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90875244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.79, "temperature": 71, "power": 189.197}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9716796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9688720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84259033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95147705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.812744140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9739990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99066162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.760498046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9683837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0108642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94647216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.956298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00640869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90313720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94976806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.985595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06951904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92645263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99237060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93511962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9364013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1074.3753222434734, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92181396484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00555419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96148681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99151611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0391845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9122314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94342041015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.933837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.001708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89166259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9281005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9161376953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05157470703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1064.3505246204525, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95526123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0093994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.929931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99737548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.005615234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9273681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.965576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14642333984375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.76, "temperature": 71, "power": 169.166}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92083740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9735107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01495361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0101318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.966552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [2, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1100.7928174952224, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.72662353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79376220703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80889892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8753662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86322021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7818603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94903564453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01275634765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.81646728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 749.784248703782, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8917236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.992919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88751220703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83465576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99383544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.907958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92449951171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938232421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8594970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.906982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8302001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97406005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9434814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95294189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02996826171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1075.1792667489865, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98907470703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92755126953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8988037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9034423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83819580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86309814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97064208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.954345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9739990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.8, "temperature": 72, "power": 302.831}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9886474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9158935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94757080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.961669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9371337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.976806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1068.9057013433771, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96405029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90570068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00738525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96612548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94903564453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0523681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9747314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95806884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.049560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98590087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9737548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.819580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95794677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0809326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97479248046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.883544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98468017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1092.4736621669952, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9923095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [3, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.825927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85980224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.786376953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.850341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79498291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.75146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84417724609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 742.0001887308397, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7880859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83636474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87469482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95977783203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96307373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87982177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78558349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83172607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8668212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.8, "temperature": 71, "power": 208.557}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84478759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96240234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8812255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94757080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94085693359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1089.0248017421088, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03179931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91180419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88433837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0032958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.870849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9293212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97796630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93719482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00421142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91351318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98065185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9639892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9403076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.928466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0362548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1080.9651501465464, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99237060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9554443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.949951171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0242919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9603271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92755126953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.036865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8326416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.987060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9608154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9676513671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92315673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85491943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8707275390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1085.314641797184, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00640869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98382568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9918212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05633544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92144775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [4, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.75518798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.6, "temperature": 69, "power": 95.053}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7857666015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8743896484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.75701904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.76666259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 759.4980301884566, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.879638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83795166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8651123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88006591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90704345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87725830078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8424072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98822021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84716796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93060302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84649658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78570556640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8453369140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1077.4718864361746, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88116455078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.052490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7698974609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0291748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.890380859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9866943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94219970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.936279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94866943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9876708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91876220703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0609130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0474853515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9068603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1078.7447551130078, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.825439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87408447265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91717529296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93695068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9571533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.78, "temperature": 72, "power": 240.996}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99969482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05889892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90765380859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9873046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03533935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.975341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.989990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86065673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1070.141934764114, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.022705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99322509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.998779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91595458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04693603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00433349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.016357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10699462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [5, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.776611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93902587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 777.8909364648911, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7945556640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83807373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.908935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78363037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9781494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.858154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8026123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88397216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.968505859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.900634765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9215087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9244384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.74603271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87750244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.965087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8619384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1076.3290604725014, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01373291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8697509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8800048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07879638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.872802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8953857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9554443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9736328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99176025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.79, "temperature": 72, "power": 285.977}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9207763671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9617919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9837646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93804931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8397216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.818359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08831787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1071.3704503553447, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88812255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88226318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.978759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01202392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86712646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9315185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9251708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.035888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9388427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9542236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91827392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00018310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.949951171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90899658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9385986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1072.7630317340265, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97064208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85308837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0299072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99151611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9407958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95916748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8702392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14739990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02166748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [6, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1108.5237842026875, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.766845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7198486328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8890380859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8394775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88006591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8902587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94049072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.8, "temperature": 72, "power": 263.396}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89520263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8848876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85162353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91583251953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88653564453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8624267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93023681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 848.6058564655643, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9425048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8919677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93402099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9014892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98834228515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.906982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9183349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8953857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.992431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87017822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07659912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91363525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95977783203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95745849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99517822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8734130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96173095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1079.9318185840405, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8917236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9034423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79461669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9713134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88519287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05010986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.087646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8721923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97113037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0235595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.935302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88616943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93988037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92608642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86358642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1086.0219293641771, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.975341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07244873046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8929443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8892822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.883056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0374755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01177978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.82, "temperature": 72, "power": 305.679}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87158203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01385498046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95745849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [7, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1112.3136179629682, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88031005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7603759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7989501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7025146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.896728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91278076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9241943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8050537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7813720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98284912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.76763916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8951416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 772.6754224547993, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8736572265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.825927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.848388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90838623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85198974609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9759521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04522705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.894287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8397216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9031982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96685791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92254638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97930908203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.888427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1081.852181328063, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0098876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87628173828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8970947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00665283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7503662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04925537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8714599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.73, "temperature": 72, "power": 186.388}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9698486328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96661376953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95611572265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00921630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99481201171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95672607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9947509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01983642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1066.7693327564218, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97576904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95281982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9803466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99456787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.953857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.923583984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9620361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.881103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10626220703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03509521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.046142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02752685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02606201171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.869873046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01446533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1102.1396729131875, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [8, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7694091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90570068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9017333984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8131103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.839599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8763427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7899169921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78131103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 730.0934192302099, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95062255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.838623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0047607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8917236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9434814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9041748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7479248046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9053955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.79, "temperature": 73, "power": 194.123}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84368896484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92041015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97747802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87054443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93328857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8746337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8553466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1068.9025120579545, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95281982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87518310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95941162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90655517578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9464111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9544677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91143798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93121337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0428466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01019287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97503662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96258544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0184326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85687255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89337158203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87615966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1069.6564897382048, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99981689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91278076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92169189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9564208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0125732421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93475341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79156494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97900390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.011474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89605712890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96893310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95782470703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9952392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92376708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86676025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.09295654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8553466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1086.6782710455707, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.018798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04425048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.976806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9403076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [9, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92926025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.37, "temperature": 70, "power": 96.312}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.6580810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8775634765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86627197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 728.4239562689767, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77252197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.763671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80657958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.939697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94830322265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96368408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7374267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80682373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.979248046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90826416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92901611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86505126953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94549560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9329833984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87249755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9117431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.937255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1082.5029515044914, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93511962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03314208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.873779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9061279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0120849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88226318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00732421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.932373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.956298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.996337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8609619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04974365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92803955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1069.349988821429, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8800048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97467041015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88360595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91302490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97784423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85040283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94573974609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87774658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.78, "temperature": 73, "power": 293.125}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97418212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96795654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04998779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.832275390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08526611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1075.708238023532, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98223876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.904541015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01593017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97027587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95758056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.952880859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82525634765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9888916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [10, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7283935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1105.5787146080977, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.839111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8455810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.73211669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.75860595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91790771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87811279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87445068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86419677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8148193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80560302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84576416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82525634765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90692138671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88763427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98175048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 854.6778213161937, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92071533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00933837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79974365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85479736328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98846435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96307373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02642822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.76, "temperature": 72, "power": 297.708}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9974365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83905029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91107177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9456787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00018310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91265869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84893798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86065673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1074.2054224177707, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92889404296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91302490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99627685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85137939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99835205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88165283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88055419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85882568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8804931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0657958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89898681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0123291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9727783203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1069.3931295326697, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.925048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.923583984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86016845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85345458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92242431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93914794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.918212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87127685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [11, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.75531005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1100.7683618512228, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7322998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83526611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84112548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.03, "temperature": 72, "power": 302.852}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95172119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.825439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7950439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9066162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93914794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9088134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931396484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 800.9970210169854, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83197021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00299072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85174560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9654541015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94476318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9080810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8575439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78045654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8883056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88189697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91107177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.902099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89544677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.930908203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87200927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1074.5376454382877, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82464599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02545166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00091552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94940185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8238525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99969482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96783447265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98394775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90972900390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9095458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93463134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88482666015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9969482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05096435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86676025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95098876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1073.7104016430374, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9444580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8795166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99859619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9552001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.74, "temperature": 73, "power": 307.607}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02886962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97113037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90521240234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91668701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02362060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99249267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [12, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1098.5822783581073, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82745361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82025146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79949951171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.865966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83087158203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.925537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82574462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.767822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86492919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94927978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 752.1941901190678, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8480224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.833984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87884521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.030029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86968994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88458251953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9100341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87530517578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88140869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94757080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.922119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.949462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.877685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94097900390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9739990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99249267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9276123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1082.3164629453127, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91339111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8748779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8927001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90081787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8587646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84796142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.74, "temperature": 73, "power": 295.625}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06488037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99530029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.909423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91583251953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.897705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95391845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.896728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1070.5691999764088, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9476318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9757080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87677001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.945068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0169677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94317626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00152587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94317626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9803466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97845458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9322509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96636962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89862060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.971435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9573974609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99713134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0126953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1094.725606594049, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89569091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06988525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [13, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89239501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8389892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.819091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93353271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94427490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 734.9186436066431, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79571533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8326416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82928466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97906494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87469482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89910888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.74, "temperature": 73, "power": 212.224}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7947998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7410888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01068115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86041259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.851318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98089599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82415771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0321044921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1060.5722638745788, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8673095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962158203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8050537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.894775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80596923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.934326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9671630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88690185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9774169921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01519775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88018798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88262939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93914794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.898193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1063.7712180327794, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9637451171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95635986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90606689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.985107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92730712890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9393310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87530517578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99542236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9061279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8958740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95416259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89166259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86102294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89556884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0406494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0345458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93426513671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1092.0524242951435, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.036865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.993408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99908447265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.879638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [14, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.48, "temperature": 70, "power": 309.283}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80560302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8131103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79180908203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.884033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 746.7580365228025, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83612060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84588623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00128173828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97076416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94549560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77191162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8509521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7977294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04913330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.922119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8463134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11358642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1081.391555645523, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8697509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8599853515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9713134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.028076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89837646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9468994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01458740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02850341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91253662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83660888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79998779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88018798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8387451171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02410888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1071.262253603257, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9302978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98956298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9554443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97637939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93865966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87896728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86175537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9219970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.75, "temperature": 72, "power": 187.62}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95196533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97454833984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.097412109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03302001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93475341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9525146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87872314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8885498046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1091.9746676731168, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79437255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93511962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91729736328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9586181640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87786865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.827880859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02691650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05718994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [15, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1105.9526522522765, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77789306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.74603271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89959716796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87744140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.72613525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79461669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89373779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91949462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.822998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.75787353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83685302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89617919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80035400390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9439697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93365478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 844.5575533909964, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85809326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8887939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01885986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85614013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02154541015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95733642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.848876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85498046875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.8, "temperature": 72, "power": 302.931}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84747314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9488525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91619873046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8489990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00286865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89837646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82684326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01019287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1069.2062222243835, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92095947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93646240234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8726806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87744140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0084228515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8590087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97686767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94952392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01519775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.973876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.949462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93267822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88104248046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0472412109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1070.4621792282105, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.79, "temperature": 73, "power": 307.934}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-resnet50.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "resnet50", "--batch-size", "64"], "time": 1712077485.6945655, "return_code": 0}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/rwkv.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/rwkv.D0.data new file mode 100644 index 000000000..a49ee583b --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/rwkv.D0.data @@ -0,0 +1,462 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "rwkv", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv", "tags": ["llm", "rnn", "unsupported-rocm"], "plan": {"method": "per_gpu"}, "argv": {"--data_type": "dummy", "--ctx_len": 128, "--epoch_steps": 1000, "--epoch_count": 20, "--epoch_begin": 0, "--epoch_save": 0, "--micro_bsz": 16, "--n_layer": 12, "--n_embd": 768, "--pre_ffn": 0, "--head_qk": 0, "--lr_init": "6e-4", "--lr_final": "1e-5", "--warmup_steps": 0, "--beta1": 0.9, "--beta2": 0.99, "--adam_eps": "1e-8", "--accelerator": "gpu", "--devices": 1, "--precision": "tf32", "--strategy": "ddp_find_unused_parameters_false", "--grad_cp": 0, "--random_seed": 1234, "--enable_progress_bar": "False"}, "weight": 1.0, "name": "rwkv", "tag": ["rwkv", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 45, "power": 72.501, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712079323.433281, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv/voirconf-rwkv.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py", "--data_type", "dummy", "--ctx_len", "128", "--epoch_steps", "1000", "--epoch_count", "20", "--epoch_begin", "0", "--epoch_save", "0", "--micro_bsz", "16", "--n_layer", "12", "--n_embd", "768", "--pre_ffn", "0", "--head_qk", "0", "--lr_init", "6e-4", "--lr_final", "1e-5", "--warmup_steps", "0", "--beta1", "0.9", "--beta2", "0.99", "--adam_eps", "1e-8", "--accelerator", "gpu", "--devices", "1", "--precision", "tf32", "--strategy", "ddp_find_unused_parameters_false", "--grad_cp", "0", "--random_seed", "1234", "--enable_progress_bar", "False"], "time": 1712079323.4496007}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "[2024-04-02 17:35:25,680] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", "pipe": "stdout"} +{"event": "line", "data": "########## work in progress ##########\n", "pipe": "stderr"} +{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"} +{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"} +{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "line", "data": "Global seed set to 1234\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "############################################################################\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# RWKV-4 TF32 on 1x1 GPU, bsz 1x1x16=16, ddp_find_unused_parameters_false \n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Data = (dummy), ProjDir = /Users/satyaortiz-gagne/travail/mila/milabench/proj/rwkv/\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Epoch = 0 to 19 (will continue afterwards), save every 0 epoch\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Each \"epoch\" = 1000 steps, 16000 samples, 2048000 tokens\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Model = 12 n_layer, 768 n_embd, 128 ctx_len\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Adam = lr 0.0006 to 1e-05, warmup 0 steps, beta (0.9, 0.99), eps 1e-08\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Found torch 2.1.0+cu118, recommend 1.13.1+cu117 or newer\n", "pipe": "stderr"} +{"event": "line", "data": "# Found deepspeed 0.12.2, recommend 0.7.0 (faster than newer versions)\n", "pipe": "stderr"} +{"event": "line", "data": "# Found pytorch_lightning 1.9.5, recommend 1.9.1 or newer\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "############################################################################\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "{'load_model': '', 'wandb': '', 'proj_dir': '/Users/satyaortiz-gagne/travail/mila/milabench/proj/rwkv/', 'random_seed': 1234, 'data_file': '', 'data_type': 'dummy', 'vocab_size': 0, 'ctx_len': 128, 'epoch_steps': 1000, 'epoch_count': 20, 'epoch_begin': 0, 'epoch_save': 0, 'micro_bsz': 16, 'n_layer': 12, 'n_embd': 768, 'dim_att': 768, 'dim_ffn': 3072, 'pre_ffn': 0, 'head_qk': 0, 'tiny_att_dim': 0, 'tiny_att_layer': -999, 'lr_init': 0.0006, 'lr_final': 1e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.99, 'adam_eps': 1e-08, 'grad_cp': 0, 'my_pile_version': 1, 'my_pile_stage': 0, 'my_pile_shift': -1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'my_img_version': 0, 'my_img_size': 0, 'my_img_bit': 0, 'my_img_clip': 'x', 'my_img_clip_scale': 1, 'my_img_l1_scale': 0, 'my_img_encoder': 'x', 'my_sample_len': 0, 'my_ffn_shift': 1, 'my_att_shift': 1, 'my_pos_emb': 0, 'load_partial': 0, 'magic_prime': 0, 'my_qa_mask': 0, 'my_testing': '', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': False, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': None, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'ddp_find_unused_parameters_false', 'sync_batchnorm': False, 'precision': 'tf32', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2024-04-02-17-35-27', 'betas': (0.9, 0.99), 'real_bsz': 16, 'run_name': '0 ctx128 L12 D768'}\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "Building dummy data...\n", "pipe": "stderr"} +{"event": "line", "data": "Building token list...\n", "pipe": "stderr"} +{"event": "line", "data": "Data has 1620950 tokens, 13 vocab size.\n", "pipe": "stderr"} +{"event": "line", "data": "RWKV_MY_TESTING \n", "pipe": "stdout"} +{"event": "line", "data": "Using /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118 as PyTorch extensions root...\n", "pipe": "stderr"} +{"event": "line", "data": "Detected CUDA files, patching ldflags\n", "pipe": "stderr"} +{"event": "line", "data": "Emitting ninja build file /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118/wkv_128/build.ninja...\n", "pipe": "stderr"} +{"event": "line", "data": "Building extension module wkv_128...\n", "pipe": "stderr"} +{"event": "line", "data": "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", "pipe": "stderr"} +{"event": "line", "data": "ninja: no work to do.\n", "pipe": "stdout"} +{"event": "line", "data": "Loading extension module wkv_128...\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "line", "data": "############################################################################\n", "pipe": "stdout"} +{"event": "line", "data": "#\n", "pipe": "stdout"} +{"event": "line", "data": "# Init model weight (slow for large models)...\n", "pipe": "stdout"} +{"event": "line", "data": "#\n", "pipe": "stdout"} +{"event": "line", "data": "############################################################################\n", "pipe": "stdout"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "line", "data": "13 768 -0.0006 emb.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.0.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.0.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.0.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.0.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.0.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.0.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.0.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.1.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.1.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.1.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.1.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.1.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.1.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.1.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.2.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.2.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.2.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.2.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.2.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.2.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.2.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.3.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.3.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.3.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.3.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.3.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.3.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.3.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.4.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.4.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.4.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.4.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.4.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.4.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.4.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.5.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.5.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.5.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.5.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.5.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.5.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.5.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.6.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.6.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.6.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.6.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.6.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.6.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.6.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.7.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.7.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.7.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.7.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.7.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.7.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.7.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.8.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.8.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.8.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.8.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.8.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.8.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.8.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.9.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.9.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.9.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.9.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.9.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.9.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.9.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.10.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.10.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.10.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.10.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.10.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.10.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.10.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.11.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.11.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.11.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.11.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.11.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.11.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.11.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "13 768 0.5 head.weight\n", "pipe": "stdout"} +{"event": "line", "data": "GPU available: True (cuda), used: True\n", "pipe": "stderr"} +{"event": "line", "data": "TPU available: False, using: 0 TPU cores\n", "pipe": "stderr"} +{"event": "line", "data": "IPU available: False, using: 0 IPUs\n", "pipe": "stderr"} +{"event": "line", "data": "HPU available: False, using: 0 HPUs\n", "pipe": "stderr"} +{"event": "line", "data": "13 768 emb.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln0.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln0.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.0.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.0.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.1.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.1.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.2.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.2.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.3.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.3.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.4.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.4.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.5.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.5.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.6.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.6.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.7.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.7.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.8.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.8.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.9.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.9.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.10.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.10.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.11.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.11.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 ln_out.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 ln_out.bias\n", "pipe": "stdout"} +{"event": "line", "data": "13 768 head.weight\n", "pipe": "stdout"} +{"event": "line", "data": "[rank: 0] Global seed set to 1234\n", "pipe": "stderr"} +{"event": "line", "data": "Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/1\n", "pipe": "stderr"} +{"event": "line", "data": "----------------------------------------------------------------------------------------------------\n", "pipe": "stderr"} +{"event": "line", "data": "distributed_backend=nccl\n", "pipe": "stderr"} +{"event": "line", "data": "All distributed processes registered. Starting with 1 processes\n", "pipe": "stderr"} +{"event": "line", "data": "----------------------------------------------------------------------------------------------------\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n", "pipe": "stderr"} +{"event": "line", "data": "Installed CUDA version 11.5 does not match the version torch was compiled with 11.8 but since the APIs are compatible, accepting this combination\n", "pipe": "stdout"} +{"event": "line", "data": "Using /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118 as PyTorch extensions root...\n", "pipe": "stderr"} +{"event": "line", "data": "Detected CUDA files, patching ldflags\n", "pipe": "stderr"} +{"event": "line", "data": "Emitting ninja build file /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118/fused_adam/build.ninja...\n", "pipe": "stderr"} +{"event": "line", "data": "Building extension module fused_adam...\n", "pipe": "stderr"} +{"event": "line", "data": "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", "pipe": "stderr"} +{"event": "line", "data": "[1/2] /usr/bin/nvcc -DTORCH_EXTENSION_NAME=fused_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/includes -I/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/TH -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/THC -isystem /home/ubuntu/miniconda3/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -lineinfo --use_fast_math -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -DBF16_AVAILABLE -U__CUDA_NO_BFLOAT16_OPERATORS__ -U__CUDA_NO_BFLOAT162_OPERATORS__ -std=c++17 -c /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam/multi_tensor_adam.cu -o multi_tensor_adam.cuda.o \n", "pipe": "stdout"} +{"event": "line", "data": "FAILED: multi_tensor_adam.cuda.o \n", "pipe": "stdout"} +{"event": "line", "data": "/usr/bin/nvcc -DTORCH_EXTENSION_NAME=fused_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/includes -I/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/TH -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/THC -isystem /home/ubuntu/miniconda3/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -lineinfo --use_fast_math -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -DBF16_AVAILABLE -U__CUDA_NO_BFLOAT16_OPERATORS__ -U__CUDA_NO_BFLOAT162_OPERATORS__ -std=c++17 -c /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam/multi_tensor_adam.cu -o multi_tensor_adam.cuda.o \n", "pipe": "stdout"} +{"event": "line", "data": "/usr/include/c++/11/bits/std_function.h:435:145: error: parameter packs not expanded with \u2018...\u2019:\n", "pipe": "stdout"} +{"event": "line", "data": " 435 | function(_Functor&& __f)\n", "pipe": "stdout"} +{"event": "line", "data": " | ^ \n", "pipe": "stdout"} +{"event": "line", "data": "/usr/include/c++/11/bits/std_function.h:435:145: note: \u2018_ArgTypes\u2019\n", "pipe": "stdout"} +{"event": "line", "data": "/usr/include/c++/11/bits/std_function.h:530:146: error: parameter packs not expanded with \u2018...\u2019:\n", "pipe": "stdout"} +{"event": "line", "data": " 530 | operator=(_Functor&& __f)\n", "pipe": "stdout"} +{"event": "line", "data": " | ^ \n", "pipe": "stdout"} +{"event": "line", "data": "/usr/include/c++/11/bits/std_function.h:530:146: note: \u2018_ArgTypes\u2019\n", "pipe": "stdout"} +{"event": "line", "data": "ninja: build stopped: subcommand failed.\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1574.375, 81920.0], "load": 0.02, "temperature": 45, "power": 69.155}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 44, "power": 48.303}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2680.375, 81920.0], "load": 0, "temperature": 45, "power": 68.666}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2680.375, 81920.0], "load": 0, "temperature": 45, "power": 68.472}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2680.375, 81920.0], "load": 0, "temperature": 45, "power": 68.459}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2680.375, 81920.0], "load": 0, "temperature": 45, "power": 68.667}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2680.375, 81920.0], "load": 0, "temperature": 45, "power": 68.667}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2680.375, 81920.0], "load": 0, "temperature": 45, "power": 68.569}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2680.375, 81920.0], "load": 0, "temperature": 45, "power": 68.667}}}, "pipe": "data"} +{"event": "error", "data": {"type": "RuntimeError", "message": "Error building extension 'fused_adam'"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 2100, in _run_ninja_build\n", "pipe": "stderr"} +{"event": "line", "data": " subprocess.run(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/subprocess.py\", line 526, in run\n", "pipe": "stderr"} +{"event": "line", "data": " raise CalledProcessError(retcode, process.args,\n", "pipe": "stderr"} +{"event": "line", "data": "subprocess.CalledProcessError: Command '['ninja', '-v']' returned non-zero exit status 1.\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "The above exception was the direct cause of the following exception:\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py\", line 420, in \n", "pipe": "stderr"} +{"event": "line", "data": " trainer.fit(model, data_loader)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 608, in fit\n", "pipe": "stderr"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": " call._call_and_handle_interrupt(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py\", line 36, in _call_and_handle_interrupt\n", "pipe": "stderr"} +{"event": "line", "data": " return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/launchers/subprocess_script.py\", line 88, in launch\n", "pipe": "stderr"} +{"event": "line", "data": " return function(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 650, in _fit_impl\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(model, ckpt_path=self.ckpt_path)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 1093, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " self.strategy.setup(self)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/ddp.py\", line 181, in setup\n", "pipe": "stderr"} +{"event": "line", "data": " self.setup_optimizers(trainer)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/strategy.py\", line 142, in setup_optimizers\n", "pipe": "stderr"} +{"event": "line", "data": " self.optimizers, self.lr_scheduler_configs, self.optimizer_frequencies = _init_optimizers_and_lr_schedulers(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/core/optimizer.py\", line 180, in _init_optimizers_and_lr_schedulers\n", "pipe": "stderr"} +{"event": "line", "data": " optim_conf = model.trainer._call_lightning_module_hook(\"configure_optimizers\", pl_module=model)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 1356, in _call_lightning_module_hook\n", "pipe": "stderr"} +{"event": "line", "data": " output = fn(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/src/model.py\", line 606, in configure_optimizers\n", "pipe": "stderr"} +{"event": "line", "data": " return FusedAdam(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/adam/fused_adam.py\", line 94, in __init__\n", "pipe": "stderr"} +{"event": "line", "data": " fused_adam_cuda = FusedAdamBuilder().load()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py\", line 452, in load\n", "pipe": "stderr"} +{"event": "line", "data": " return self.jit_load(verbose)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py\", line 501, in jit_load\n", "pipe": "stderr"} +{"event": "line", "data": " op_module = load(name=self.name,\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 1308, in load\n", "pipe": "stderr"} +{"event": "line", "data": " return _jit_compile(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 1710, in _jit_compile\n", "pipe": "stderr"} +{"event": "line", "data": " _write_ninja_file_and_build_library(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 1823, in _write_ninja_file_and_build_library\n", "pipe": "stderr"} +{"event": "line", "data": " _run_ninja_build(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 2116, in _run_ninja_build\n", "pipe": "stderr"} +{"event": "line", "data": " raise RuntimeError(message) from e\n", "pipe": "stderr"} +{"event": "line", "data": "RuntimeError: Error building extension 'fused_adam'\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv/voirconf-rwkv.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py", "--data_type", "dummy", "--ctx_len", "128", "--epoch_steps", "1000", "--epoch_count", "20", "--epoch_begin", "0", "--epoch_save", "0", "--micro_bsz", "16", "--n_layer", "12", "--n_embd", "768", "--pre_ffn", "0", "--head_qk", "0", "--lr_init", "6e-4", "--lr_final", "1e-5", "--warmup_steps", "0", "--beta1", "0.9", "--beta2", "0.99", "--adam_eps", "1e-8", "--accelerator", "gpu", "--devices", "1", "--precision", "tf32", "--strategy", "ddp_find_unused_parameters_false", "--grad_cp", "0", "--random_seed", "1234", "--enable_progress_bar", "False"], "time": 1712079350.9924312, "return_code": 1}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/stargan.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/stargan.D0.data new file mode 100644 index 000000000..07f49a8e6 --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/stargan.D0.data @@ -0,0 +1,682 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "stargan", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "tags": ["gan", "resnet", "vision"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan", "plan": {"method": "per_gpu"}, "argv": {"--image_size": 512, "--c_dim": 5, "--batch_size": 16}, "weight": 1.0, "name": "stargan", "tag": ["stargan", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 26208.375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.31992645263671876}, "temperature": 73, "power": 101.22, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712078897.253788, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/voirconf-stargan.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py", "--image_size", "512", "--c_dim", "5", "--batch_size", "16"], "time": 1712078897.2705288}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Namespace(c_dim=5, c2_dim=8, celeba_crop_size=178, rafd_crop_size=256, image_size=512, g_conv_dim=64, d_conv_dim=64, g_repeat_num=6, d_repeat_num=6, lambda_cls=1, lambda_rec=10, lambda_gp=10, dataset='synth', batch_size=16, num_iters=200000, num_iters_decay=100000, g_lr=0.0001, d_lr=0.0001, n_critic=5, beta1=0.5, beta2=0.999, resume_iters=None, selected_attrs=['Black_Hair', 'Blond_Hair', 'Brown_Hair', 'Male', 'Young'], test_iters=200000, num_workers=1, mode='train', use_tensorboard=False, celeba_image_dir='data/celeba/images', attr_path='data/celeba/list_attr_celeba.txt', rafd_image_dir='data/RaFD/train', log_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/logs', model_save_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/models', sample_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/samples', result_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/results', log_step=10, sample_step=1000, model_save_step=10000, lr_update_step=1000)\n", "pipe": "stdout"} +{"event": "line", "data": "Generator(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(8, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (5): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (6): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (7): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (8): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (9): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (10): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (11): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (12): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (13): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (14): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (15): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (16): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (17): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (18): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (19): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (20): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (21): Conv2d(64, 3, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (22): Tanh()\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": ")\n", "pipe": "stdout"} +{"event": "line", "data": "G\n", "pipe": "stdout"} +{"event": "line", "data": "The number of parameters: 8430528\n", "pipe": "stdout"} +{"event": "line", "data": "Discriminator(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (1): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (3): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (5): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (6): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (7): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (8): Conv2d(512, 1024, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (9): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (10): Conv2d(1024, 2048, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (11): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (conv1): Conv2d(2048, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (conv2): Conv2d(2048, 5, kernel_size=(8, 8), stride=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": ")\n", "pipe": "stdout"} +{"event": "line", "data": "D\n", "pipe": "stdout"} +{"event": "line", "data": "The number of parameters: 45376448\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [0, 10000]}, "pipe": "data"} +{"event": "line", "data": "Start training...\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [1, 10000]}, "pipe": "data"} +{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(warning.format(ret))\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 13.089741706848145}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [53428.375, 81920.0], "load": 1.0, "temperature": 70, "power": 100.303}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [19484.375, 81920.0], "load": 1.0, "temperature": 71, "power": 139.052}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.317051887512207}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.644302368164062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.848727703094482}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 8.103803739898403, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.238846778869629}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 62.85356381808514, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29784.375, 81920.0], "load": 0.99, "temperature": 75, "power": 259.957}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.218193292617798}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 6.441458616433847, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31678.375, 81920.0], "load": 1.0, "temperature": 73, "power": 330.939}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.8139472007751465}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.672729730606079}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.4927239418029785}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:12], Iteration [10/200000], D/loss_real: -1.2637, D/loss_fake: 0.0986, D/loss_cls: 3.2617, D/loss_gp: 0.0396, G/loss_fake: -0.8299, G/loss_rec: 0.5488, G/loss_cls: 3.4044\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 44.362861183804156, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.7979466915130615}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.576117515563965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.983277797698975}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.448564052581787}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.224003791809082}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.55333479758454, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36418.375, 81920.0], "load": 1.0, "temperature": 75, "power": 297.802}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98637580871582}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.404041767120361}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 26.85794672508046, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.861074924468994}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.728839635848999}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.7439486980438232}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:16], Iteration [20/200000], D/loss_real: 0.0041, D/loss_fake: -0.3487, D/loss_cls: 3.3985, D/loss_gp: 0.0690, G/loss_fake: 0.3632, G/loss_rec: 0.5314, G/loss_cls: 3.4477\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 44.500532049751826, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.070382118225098}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.543978691101074}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.056460380554199}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.90916633605957}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.284873962402344}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.51226369361384, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36418.375, 81920.0], "load": 0.96, "temperature": 75, "power": 301.146}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.841668128967285}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.380107402801514}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 26.784126623726365, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.945100784301758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.168574333190918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.5670511722564697}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:19], Iteration [30/200000], D/loss_real: -2.5096, D/loss_fake: 1.2898, D/loss_cls: 3.4684, D/loss_gp: 0.0318, G/loss_fake: -1.3278, G/loss_rec: 0.5435, G/loss_cls: 3.3680\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 44.74442549058152, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.532721757888794}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.50101637840271}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.7967890501022339}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36418.375, 81920.0], "load": 0.92, "temperature": 76, "power": 252.409}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.220597505569458}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7649186253547668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.3234911378094, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.5370545387268066}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.27542781829834}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 26.885388307150418, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.046738386154175}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9461417198181152}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2733052968978882}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:23], Iteration [40/200000], D/loss_real: -4.1574, D/loss_fake: 0.3679, D/loss_cls: 3.4969, D/loss_gp: 0.0566, G/loss_fake: -0.2052, G/loss_rec: 0.5576, G/loss_cls: 3.4414\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 43.95507960091843, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7831344604492188}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 76, "power": 277.74}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.49488839507102966}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.29238027334213257}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.019767314195632935}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": -0.31068718433380127}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.34541434629127, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.8902745842933655}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.2322750091552734}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.916079293093336, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.40593504905700684}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.4870221018791199}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7905704975128174}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:26], Iteration [50/200000], D/loss_real: -5.1061, D/loss_fake: 1.1584, D/loss_cls: 4.3119, D/loss_gp: 0.0426, G/loss_fake: -0.2823, G/loss_rec: 0.5366, G/loss_cls: 3.4462", "pipe": "stdout"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 42.69129624313872, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.5668803453445435}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 77, "power": 296.287}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9807621240615845}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9384143352508545}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9174190163612366}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.8964967727661133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.43898717115566, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.0413246154785156}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9203179478645325}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 26.873926246503125, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.6606730222702026}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.5495665073394775}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.4471652507781982}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:30], Iteration [60/200000], D/loss_real: -4.0746, D/loss_fake: 1.6119, D/loss_cls: 3.8760, D/loss_gp: 0.0034, G/loss_fake: -2.0161, G/loss_rec: 0.5201, G/loss_cls: 3.4230\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.99, "temperature": 76, "power": 358.845}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.71429917315668, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.663923978805542}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.712101936340332}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.4543116092681885}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.023350238800049}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.411146640777588}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.334419212054094, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.6416218280792236}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.8717973232269287}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.885113111151245, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.103079080581665}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.2871930599212646}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.93, "temperature": 76, "power": 187.765}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.1995863914489746}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:33], Iteration [70/200000], D/loss_real: -6.7271, D/loss_fake: 3.5354, D/loss_cls: 3.5678, D/loss_gp: 0.0823, G/loss_fake: -2.5282, G/loss_rec: 0.5270, G/loss_cls: 3.6515\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 40.960157776343216, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.961525797843933}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.2567733526229858}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.284398078918457}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.349828839302063}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7634912133216858}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.24667062950582, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.7425315380096436}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.935045838356018}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.93, "temperature": 76, "power": 319.931}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 26.710782283722406, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.51625657081604}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.3935335874557495}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.3159737586975098}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:37], Iteration [80/200000], D/loss_real: -5.4053, D/loss_fake: 3.2105, D/loss_cls: 3.3761, D/loss_gp: 0.0135, G/loss_fake: -3.0750, G/loss_rec: 0.5277, G/loss_cls: 3.4031\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 44.716048480804396, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.3885138034820557}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.4682352542877197}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.1441540718078613}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.0262908935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.9325515031814575}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.360431991072, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 77, "power": 305.725}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.0223560333251953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.106226921081543}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 26.722320865767585, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.6552891731262207}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.4856133460998535}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.3280460834503174}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:40], Iteration [90/200000], D/loss_real: -4.9272, D/loss_fake: 3.9632, D/loss_cls: 3.2878, D/loss_gp: 0.0004, G/loss_fake: -4.3415, G/loss_rec: 0.5538, G/loss_cls: 3.3516\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 44.507487157049326, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.2114596366882324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.4217722415924072}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.5507781505584717}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.2502453327178955}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.983329176902771}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.93, "temperature": 77, "power": 368.945}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.34861032783197, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.389871835708618}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.196514129638672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 26.721445228563542, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.0577573776245117}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.667776346206665}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.360865592956543}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:44], Iteration [100/200000], D/loss_real: -2.1996, D/loss_fake: 1.3916, D/loss_cls: 3.3000, D/loss_gp: 0.0869, G/loss_fake: -1.3780, G/loss_rec: 0.5768, G/loss_cls: 3.3806\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 44.83059187586123, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.132774829864502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.1603212356567383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.7224862575531006}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.98, "temperature": 78, "power": 209.342}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.6672191619873047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.721295118331909}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.3705876718548, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.4580485820770264}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.0790822505950928}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 26.789347209719587, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9940168857574463}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9327802658081055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.5489604473114014}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:47], Iteration [110/200000], D/loss_real: -5.2043, D/loss_fake: 4.3399, D/loss_cls: 3.2522, D/loss_gp: 0.0161, G/loss_fake: -3.4171, G/loss_rec: 0.5713, G/loss_cls: 3.4308", "pipe": "stdout"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 44.06869220986059, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.1736793518066406}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 78, "power": 300.578}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9872524738311768}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.7024011611938477}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.045640468597412}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.2699198722839355}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.152547311039434, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.930947780609131}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.991608738899231}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.867702753537163, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.7082041501998901}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.5778729915618896}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.1663765907287598}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:51], Iteration [120/200000], D/loss_real: -2.5564, D/loss_fake: 1.0542, D/loss_cls: 3.2038, D/loss_gp: 0.0465, G/loss_fake: -2.4015, G/loss_rec: 0.5582, G/loss_cls: 3.3508\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 40.38467153484138, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.566779613494873}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.92, "temperature": 78, "power": 281.405}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.849420547485352}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.100782632827759}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.943386077880859}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.957638263702393}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.237947984535445, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.743450164794922}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.329449653625488}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 26.856155305956968, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.75643539428711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.2974271774292}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.21634578704834}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:54], Iteration [130/200000], D/loss_real: -0.2338, D/loss_fake: 0.1474, D/loss_cls: 3.4953, D/loss_gp: 0.6808, G/loss_fake: -0.1021, G/loss_rec: 0.5332, G/loss_cls: 3.4595\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.98, "temperature": 77, "power": 350.66}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.949590570421236, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.263629913330078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.704811096191406}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.049165725708008}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.001199722290039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 20.01687240600586}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.505042943657934, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.680143356323242}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.48331356048584}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 26.890739919507194, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.309514999389648}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.884629249572754}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.92, "temperature": 77, "power": 161.811}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.30716323852539}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:58], Iteration [140/200000], D/loss_real: 0.1322, D/loss_fake: -0.1191, D/loss_cls: 3.3466, D/loss_gp: 0.7947, G/loss_fake: 0.1972, G/loss_rec: 0.5142, G/loss_cls: 3.3484\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 43.9498588585992, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.482892990112305}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.06439208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.64170503616333}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [144, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.9685356616973877}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [145, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.367649078369141}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.37724130191398, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [146, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.470202922821045}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.93, "temperature": 78, "power": 305.085}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [147, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.5152342319488525}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.949879568365297, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [148, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.2401461601257324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [149, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.1975464820861816}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [150, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.218050956726074}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:01], Iteration [150/200000], D/loss_real: -1.2048, D/loss_fake: 0.3205, D/loss_cls: 3.3487, D/loss_gp: 0.1754, G/loss_fake: 0.2283, G/loss_rec: 0.5049, G/loss_cls: 3.3464\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 40.43206005989796, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [151, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.0931990146636963}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [152, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.79805326461792}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [153, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.6309213638305664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [154, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.41560435295105}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [155, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.254915237426758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.997666049807904, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 78, "power": 229.605}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [156, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.607158660888672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [157, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.4929704666137695}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.827416051706482, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [158, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.434049606323242}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [159, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.508437633514404}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [160, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.6448564529418945}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:05], Iteration [160/200000], D/loss_real: -0.7107, D/loss_fake: 0.3651, D/loss_cls: 3.3345, D/loss_gp: 0.2656, G/loss_fake: -1.2273, G/loss_rec: 0.5132, G/loss_cls: 3.3570\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 40.57161709710732, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [161, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.5689890384674072}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [162, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.807031631469727}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [163, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.7881133556365967}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [164, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.6615917682647705}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [165, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.1059515476226807}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.99, "temperature": 77, "power": 164.373}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.18596391046428, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [166, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.761859893798828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [167, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.50831937789917}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 26.774190096082005, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [168, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.4266552925109863}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [169, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.2906341552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [170, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.1533851623535156}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:08], Iteration [170/200000], D/loss_real: -2.6321, D/loss_fake: 2.4288, D/loss_cls: 3.3430, D/loss_gp: 0.0014, G/loss_fake: -2.3668, G/loss_rec: 0.5078, G/loss_cls: 3.3512\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 44.10309255185025, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [171, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.6296677589416504}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [172, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.3898794651031494}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.91, "temperature": 78, "power": 349.589}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [173, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.0889828205108643}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [174, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8158233165740967}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [175, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.5107460021972656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.27341524044557, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [176, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.5628106594085693}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [177, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.2683067321777344}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.954062714644607, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [178, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.0418951511383057}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [179, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.7302507162094116}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [180, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.5594336986541748}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:12], Iteration [180/200000], D/loss_real: -3.0206, D/loss_fake: 1.0339, D/loss_cls: 3.3110, D/loss_gp: 0.0235, G/loss_fake: -0.6357, G/loss_rec: 0.5053, G/loss_cls: 3.3933\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 40.15355391589466, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [181, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.3772342205047607}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 78, "power": 287.055}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [182, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.388420581817627}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [183, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.4431064128875732}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [184, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.605264186859131}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [185, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.3124754428863525}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.34187725289228, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [186, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.582227945327759}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [187, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.6479787826538086}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.840951018318734, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [188, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.7377052307128906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [189, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.453064441680908}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [190, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.4794130325317383}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:16], Iteration [190/200000], D/loss_real: -1.7670, D/loss_fake: 0.7271, D/loss_cls: 3.3752, D/loss_gp: 0.0144, G/loss_fake: -0.2988, G/loss_rec: 0.4972, G/loss_cls: 3.3753\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 40.185541971868425, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [191, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.106503486633301}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 78, "power": 309.893}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [192, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.14693546295166}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [193, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9078567028045654}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [194, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.977015256881714}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [195, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.739900588989258}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.30069066735387, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [196, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.1128323078155518}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [197, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.965388059616089}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.856631387264535, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [198, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.1346616744995117}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [199, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.836603164672852}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [200, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.476594924926758}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:19], Iteration [200/200000], D/loss_real: -0.1541, D/loss_fake: -0.0009, D/loss_cls: 3.2550, D/loss_gp: 0.1377, G/loss_fake: -1.0008, G/loss_rec: 0.4956, G/loss_cls: 3.3767\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 40.4359290694734, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 79, "power": 317.084}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 79, "power": 317.084}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/voirconf-stargan.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py", "--image_size", "512", "--c_dim", "5", "--batch_size", "16"], "time": 1712078980.6923656, "return_code": 0}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/super-slomo.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/super-slomo.D0.data new file mode 100644 index 000000000..8b493eb4e --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/super-slomo.D0.data @@ -0,0 +1,391 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "super-slomo", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "tags": ["convnet", "unet", "video-interpolation", "vision"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo", "plan": {"method": "per_gpu"}, "argv": {"--train_batch_size": 32}, "weight": 1.0, "name": "super-slomo", "tag": ["super-slomo", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 74, "power": 107.427, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712078983.331632, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo/voirconf-super-slomo.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py", "--train_batch_size", "32"], "time": 1712078983.3483179}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(msg)\n", "pipe": "stderr"} +{"event": "line", "data": "Epoch: 0\n", "pipe": "stdout"} +{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:136: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\"Detected call of `lr_scheduler.step()` before `optimizer.step()`. \"\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "progress": [0, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1472.375, 81920.0], "load": 0, "temperature": 70, "power": 96.48}}}, "pipe": "data"} +{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/functional.py:4296: UserWarning: Default grid_sample and affine_grid behavior has changed to align_corners=False since 1.3.0. Please specify align_corners=True if the old behavior is desired. See the documentation of grid_sample for details.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 328.47808837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.46881103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 74, "power": 285.378}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4638977050781}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4608459472656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4584655761719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4568786621094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4557800292969}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.81, "temperature": 74, "power": 280.3}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.45513916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.45465087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4543762207031}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.23652711456088, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4541320800781}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.8, "temperature": 75, "power": 324.06}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.45391845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.224495820542586, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.45367431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.45343017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.36284911916846, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.45318603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.45294189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 76, "power": 190.737}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.261178450602266, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.45263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 32.967581505926894, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.45233154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4519958496094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.73784474584244, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4516906738281}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.82, "temperature": 76, "power": 279.304}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.451416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.594516605978924, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4512023925781}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.28498117798238, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4510803222656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4508972167969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.10012373306887, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 76, "power": 283.518}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4505920410156}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4503479003906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.86162470478901, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4500427246094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4498291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.27301512778587, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4496154785156}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.82, "temperature": 77, "power": 254.367}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.372754421091535, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.44940185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4490966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.05545757018502, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.44879150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4484558105469}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.02176004256121, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 75, "power": 167.074}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.44818115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4478454589844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.77278801272933, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.447509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 36.070490218961865, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4471435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4467468261719}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 76, "power": 241.14}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.240082608292994, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4464111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.446044921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.139318454450525, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.445556640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.10544352442072, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4450988769531}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 77, "power": 302.218}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4447326660156}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 37.97631533159207, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4442138671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4437255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.25052577462, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4432373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.44268798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.807311462365455, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 77, "power": 102.077}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.44256591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 35.98783215856734, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4415283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4410095214844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.26664140773931, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4404296875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.86, "temperature": 77, "power": 265.928}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.62171037120563, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4397888183594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4390563964844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 37.871864406161436, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.43829345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.927191217804975, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4366455078125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 78, "power": 278.754}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4357604980469}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.766062855286634, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.434814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 36.057952936916436, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4338073730469}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4327697753906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.72634373080457, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 77, "power": 143.281}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.430419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.02601584300039, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.42913818359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.15053158548995, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4263916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.88, "temperature": 78, "power": 248.171}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 37.84231553301942, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.42498779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4234924316406}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.403488232377484, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.93665569757023, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.42022705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 77, "power": 303.997}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4185485839844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 37.889177465296854, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4167175292969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4147644042969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.84517059816774, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4128112792969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.512036983718424, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4107360839844}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 76, "power": 121.342}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4085388183594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 37.8865406947121, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4062194824219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.40386962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.11664097539493, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4015808105469}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 78, "power": 283.827}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3990783691406}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.22254067513172, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3963317871094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 31.92621016364475, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.39337158203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.22786114018701, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3876647949219}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 78, "power": 274.432}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.38421630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.730715629739734, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.381103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 36.120471910127705, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.37744140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3735046386719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.69360569573131, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 78, "power": 281.407}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.36956787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3650207519531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.11270386926677, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.36029052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 33.29918068770727, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3553466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3497314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 78, "power": 142.116}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.90459676620552, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.34381103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.53086839718483, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3377685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3302917480469}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 37.969881187928614, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.32183837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.94, "temperature": 78, "power": 203.124}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3136901855469}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.05122658993147, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3064270019531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.406025870569266, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.30010986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2879943847656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.13451305722688, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 79, "power": 241.535}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.28253173828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.27197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.708231471785176, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2642517089844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.25408935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.47671511193946, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.24456787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.82, "temperature": 78, "power": 354.791}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.97211604164333, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2336120605469}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2249755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 37.81424912080379, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.212646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2038879394531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.0980645550324, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 78, "power": 246.402}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.19232177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1824035644531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.88736886730239, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1700439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 36.231083708737366, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 78, "power": 249.297}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo/voirconf-super-slomo.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py", "--train_batch_size", "32"], "time": 1712079066.8635077, "return_code": 0}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/t5.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/t5.D0.data new file mode 100644 index 000000000..e1c189b27 --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/t5.D0.data @@ -0,0 +1,590 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "T5", "--batch-size": 16}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "transformer"], "weight": 2.0, "name": "t5", "tag": ["t5", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 73, "power": 104.497, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712078035.439823, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-t5.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "T5", "--batch-size", "16"], "time": 1712078035.4563587}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.970378875732422}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.800580024719238}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.727293014526367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.682456970214844}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 73, "power": 301.633}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.654926300048828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.628446578979492}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.61573600769043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.587346076965332}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.556131362915039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.513788223266602}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.457639694213867}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.321690559387207}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.367353439331055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.313187599182129}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 73, "power": 300.274}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.28883171081543}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.212865829467773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.028278350830078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.84654426574707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.015433311462402}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.654990196228027}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.513398761524286, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.576047897338867}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.431913375854492}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35800552368164}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 74, "power": 296.81}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.161618009236065, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.306499481201172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.0736665725708}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.939047813415527}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.910099029541016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.8499263906401, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.83672046661377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.653719902038574}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.481781959533691}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.3078586854502, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.48547649383545}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.421192169189453}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 75, "power": 306.632}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.146132469177246}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.73386027344551, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.017644882202148}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.908407211303711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.934696197509766}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.747555732727051}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.01896724494396, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.655747413635254}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.503643035888672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.347489356994629}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.346884981362464, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.31602668762207}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 75, "power": 226.302}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.231689929962158}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05312442779541}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.297648020757606, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9287261962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.817322731018066}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.710197448730469}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.77194944243633, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.60150671005249}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.518242359161377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.411893844604492}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.325069427490234}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.702023231180064, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.233058929443359}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 75, "power": 232.549}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.172609329223633}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.097485542297363}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.47617582463295, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.041872501373291}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.932075023651123}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.845073699951172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.08195618693227, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.775830268859863}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.705239295959473}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.633525848388672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.575502395629883}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.66782433496777, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 75, "power": 324.204}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.5167365074157715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.423802852630615}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.354887962341309}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.27502777648228, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.272907257080078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.22577428817749}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.148532390594482}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.302127354542854, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.052487373352051}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.000744819641113}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.897304534912109}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 76, "power": 323.043}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.836282253265381}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.4645535736731, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.77700138092041}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.716606140136719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.660195827484131}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.298838210354056, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.5612311363220215}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.502846717834473}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.482702732086182}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.21506665591461, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.492550373077393}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.360192775726318}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 76, "power": 292.325}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.2910284996032715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.2590203285217285}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.78574846333013, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.185894012451172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.096889972686768}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.042828559875488}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.32153184709173, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.9600980281829834}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.8612828254699707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.813199996948242}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.48145742501573, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.774353265762329}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.680110216140747}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 76, "power": 285.975}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.641136646270752}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 49.08007362170554, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.5967588424682617}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.522066354751587}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.439159870147705}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.3564889430999756}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.58709495158289, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.3717949390411377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.323021650314331}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.2349772453308105}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.32145139087524, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.192401647567749}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 76, "power": 230.536}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.1241087913513184}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.0823092460632324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.79780897923113, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.988307476043701}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9341442584991455}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8254554271698}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.76162052154541}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.72196650215915, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.763589859008789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.7407455444335938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.672053337097168}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.68777841024662, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.99, "temperature": 76, "power": 304.881}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.585357189178467}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.5391626358032227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.4440035820007324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.65403305570211, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.3648884296417236}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.2605745792388916}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.2033910751342773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.074898776610475, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.1607017517089844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.115994691848755}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.066122055053711}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 76, "power": 284.816}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.993964672088623}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.32041072510573, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.0019028186798096}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.9345535039901733}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.8910869359970093}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.86588558414641, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.861147165298462}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.8138902187347412}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.7461824417114258}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 49.112481089674006, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.6692562103271484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.625261664390564}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.5834534168243408}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 76, "power": 298.731}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.0665671794488, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.5272022485733032}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.459859013557434}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.4157166481018066}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.3812469244003296}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.158964098619705, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.3119561672210693}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.2564345598220825}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.2277307510375977}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.989731441401226, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.1866565942764282}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.1411606073379517}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 77, "power": 300.815}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.083279013633728}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 49.16440041655849, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.0341026782989502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9987545013427734}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9641054272651672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.63416494507367, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9365309476852417}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.8940635919570923}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.8575931787490845}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.8317781686782837}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.94255042174156, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7994922995567322}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 76, "power": 304.949}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7648830413818359}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [144, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7411973476409912}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.168463253269245, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [145, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7056658267974854}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [146, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6834872364997864}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [147, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6573166847229004}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.28341553759186, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [148, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6341385245323181}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [149, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6199974417686462}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [150, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.5888203978538513}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.530540233582144, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [151, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.5731779932975769}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 76, "power": 242.951}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [152, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.5541250109672546}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [153, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.536582350730896}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [154, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.5135989785194397}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.823796543761894, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [155, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.4957291781902313}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [156, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.47861233353614807}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [157, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.47117674350738525}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.206086864876305, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [158, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.4504289925098419}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [159, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.43357139825820923}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [160, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.4255506992340088}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.78724376210535, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [161, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.41500476002693176}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 76, "power": 309.84}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [162, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.4001453220844269}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [163, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3856187164783478}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [164, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.37681058049201965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.42934239142913, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [165, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3642743229866028}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [166, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3536361753940582}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [167, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.34521564841270447}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.34800795854977, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [168, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3393672704696655}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [169, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3293442130088806}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [170, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.32399898767471313}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 76, "power": 311.488}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.0730700556139, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [171, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3113195300102234}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [172, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3049860894680023}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [173, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2919404208660126}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [174, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.28530269861221313}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.52107161537166, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [175, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2786383628845215}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [176, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.27610689401626587}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [177, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2684989273548126}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.92595580884994, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [178, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2630639374256134}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [179, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2509020268917084}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 77, "power": 303.596}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [180, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.24854587018489838}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.96482064926789, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [181, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.24656544625759125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [182, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.23619654774665833}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [183, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.23337531089782715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.993269944999156, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [184, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.22808784246444702}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [185, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.22517065703868866}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [186, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.21650603413581848}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [187, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.21727493405342102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.498195881220965, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [188, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.21555562317371368}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 77, "power": 285.4}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [189, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.20680122077465057}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [190, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.20380568504333496}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.982066780323784, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [191, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2012479454278946}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [192, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1972467303276062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [193, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1940830945968628}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.91166107588888, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [194, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1896078884601593}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [195, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.18643631041049957}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [196, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.18215446174144745}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.89319703333237, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [197, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.17853213846683502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [198, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.17645688354969025}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 76, "power": 288.788}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [199, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1725698709487915}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [200, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.16997073590755463}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.55208540216636, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [201, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.16654980182647705}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [202, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1624709963798523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [203, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.16435472667217255}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.85558406180044, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [204, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.16150528192520142}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [205, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.15788640081882477}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [206, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.15364141762256622}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 49.054422957401265, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [207, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.15300226211547852}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 77, "power": 301.966}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [208, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1467810720205307}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [209, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.14508351683616638}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.017092172493605, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [210, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.14274908602237701}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [211, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.14380408823490143}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [212, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.14112578332424164}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [213, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1418134719133377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.27492552184506, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [214, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.13803264498710632}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [215, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.13522985577583313}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [216, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.13362157344818115}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.10779637223394, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 77, "power": 279.531}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 77, "power": 297.292}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-t5.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "T5", "--batch-size", "16"], "time": 1712078110.2958682, "return_code": 0}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/tf32.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/tf32.D0.data new file mode 100644 index 000000000..7b743b550 --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/tf32.D0.data @@ -0,0 +1,124 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp32", "--tf32": true}, "weight": 0.0, "name": "tf32", "tag": ["tf32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 69, "power": 99.705, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712077285.652197, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32", "--tf32"], "time": 1712077285.6617703}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 118.32754788274686, "units": "Tflops", "t": 1712077287.5160294}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [882.5, 81920.0], "load": 0, "temperature": 66, "power": 60.775}}, "t": 1712077286.950776}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 0, "temperature": 70, "power": 327.388}}, "t": 1712077287.4610522}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 135.35934213074535, "units": "Tflops", "t": 1712077287.6792314}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.88899180073525, "units": "Tflops", "t": 1712077287.842325}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.8939237565598, "units": "Tflops", "t": 1712077288.0054052}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 306.901}}, "t": 1712077287.9707563}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.95063700383807, "units": "Tflops", "t": 1712077288.1696808}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 129.64735369900728, "units": "Tflops", "t": 1712077288.3393598}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.00863381296466, "units": "Tflops", "t": 1712077288.5035186}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 301.94}}, "t": 1712077288.4834569}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.43246021788087, "units": "Tflops", "t": 1712077288.668445}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.787619985593, "units": "Tflops", "t": 1712077288.8328652}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.66566579938606, "units": "Tflops", "t": 1712077288.9974337}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 294.924}}, "t": 1712077288.9936674}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.46489777251216, "units": "Tflops", "t": 1712077289.1623187}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.11288171634845, "units": "Tflops", "t": 1712077289.3263476}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.35490575102946, "units": "Tflops", "t": 1712077289.4913075}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.88958056283872, "units": "Tflops", "t": 1712077289.655615}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 288.197}}, "t": 1712077289.5035207}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.55959745279375, "units": "Tflops", "t": 1712077289.820384}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.61919178143685, "units": "Tflops", "t": 1712077289.9850082}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.07681503516815, "units": "Tflops", "t": 1712077290.1490693}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 294.539}}, "t": 1712077290.0134768}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.46412526650184, "units": "Tflops", "t": 1712077290.3139725}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.46258028130885, "units": "Tflops", "t": 1712077290.4787962}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.0678501148285, "units": "Tflops", "t": 1712077290.642876}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 295.798}}, "t": 1712077290.5233986}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.79984705472003, "units": "Tflops", "t": 1712077290.8073256}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.39097086508684, "units": "Tflops", "t": 1712077290.9722438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.2784043196295, "units": "Tflops", "t": 1712077291.1372902}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 295.909}}, "t": 1712077291.0319536}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.23142934316905, "units": "Tflops", "t": 1712077291.3024626}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.28803442913426, "units": "Tflops", "t": 1712077291.4674945}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.49290715438087, "units": "Tflops", "t": 1712077291.6322744}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 296.591}}, "t": 1712077291.541029}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.2689681623087, "units": "Tflops", "t": 1712077291.7973952}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.29015323998885, "units": "Tflops", "t": 1712077291.9624372}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.25949242926333, "units": "Tflops", "t": 1712077292.1262753}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 296.483}}, "t": 1712077292.049293}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.29920709977563, "units": "Tflops", "t": 1712077292.2913465}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.24855982153497, "units": "Tflops", "t": 1712077292.4564307}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.4604559850726, "units": "Tflops", "t": 1712077292.621264}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 295.898}}, "t": 1712077292.5587559}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.21103249702153, "units": "Tflops", "t": 1712077292.7864578}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.18891090514808, "units": "Tflops", "t": 1712077292.9516141}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.19852808572966, "units": "Tflops", "t": 1712077293.116764}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 296.304}}, "t": 1712077293.0668132}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.2418225910361, "units": "Tflops", "t": 1712077293.281912}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.23008218901032, "units": "Tflops", "t": 1712077293.4470382}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.23393127317235, "units": "Tflops", "t": 1712077293.6121461}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 295.324}}, "t": 1712077293.577403}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.22007561093127, "units": "Tflops", "t": 1712077293.7773101}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.22238468784028, "units": "Tflops", "t": 1712077293.9424372}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.24913733013733, "units": "Tflops", "t": 1712077294.1075234}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 296.676}}, "t": 1712077294.0850124}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.2308519880508, "units": "Tflops", "t": 1712077294.27268}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.2412451458368, "units": "Tflops", "t": 1712077294.4377837}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.26184378600908, "units": "Tflops", "t": 1712077294.6028519}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 296.978}}, "t": 1712077294.5958502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.27281949370183, "units": "Tflops", "t": 1712077294.7679858}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.25202494823964, "units": "Tflops", "t": 1712077294.9330666}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.23393127317235, "units": "Tflops", "t": 1712077295.0981672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.25106239496628, "units": "Tflops", "t": 1712077295.2632587}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 297.17}}, "t": 1712077295.1029696}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.22835017369363, "units": "Tflops", "t": 1712077295.428415}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.1900648934909, "units": "Tflops", "t": 1712077295.5935786}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.20910858862644, "units": "Tflops", "t": 1712077295.7587082}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 297.854}}, "t": 1712077295.6141336}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.16718119210032, "units": "Tflops", "t": 1712077295.9239538}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.22873506209447, "units": "Tflops", "t": 1712077296.089061}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.21314886044416, "units": "Tflops", "t": 1712077296.2541945}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 297.372}}, "t": 1712077296.1212466}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.18141046831494, "units": "Tflops", "t": 1712077296.4194238}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.18852624681085, "units": "Tflops", "t": 1712077296.584579}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.2208452943404, "units": "Tflops", "t": 1712077296.7497036}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 297.359}}, "t": 1712077296.6309273}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.27204920961543, "units": "Tflops", "t": 1712077296.9148002}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.21776661406503, "units": "Tflops", "t": 1712077297.079921}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.23008218901032, "units": "Tflops", "t": 1712077297.2450335}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 297.646}}, "t": 1712077297.1380339}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.2437474445157, "units": "Tflops", "t": 1712077297.4101856}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.26781280629592, "units": "Tflops", "t": 1712077297.5752518}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.2466348290137, "units": "Tflops", "t": 1712077297.7403445}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 295.528}}, "t": 1712077297.6469982}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.2198831914686, "units": "Tflops", "t": 1712077297.9055262}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.24528736734194, "units": "Tflops", "t": 1712077298.070622}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.25857067930923, "units": "Tflops", "t": 1712077298.2357001}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 295.815}}, "t": 1712077298.1541488}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.2225771145292, "units": "Tflops", "t": 1712077298.400884}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.25009985559896, "units": "Tflops", "t": 1712077298.5659637}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.26685002492098, "units": "Tflops", "t": 1712077298.7310297}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 296.099}}, "t": 1712077298.663185}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.21391846381613, "units": "Tflops", "t": 1712077298.896213}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.24855982153497, "units": "Tflops", "t": 1712077299.0613177}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.2266182034094, "units": "Tflops", "t": 1712077299.2264338}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 296.978}}, "t": 1712077299.1703143}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.22565601715377, "units": "Tflops", "t": 1712077299.3915896}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.2547201713875, "units": "Tflops", "t": 1712077299.5566738}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.2787894972917, "units": "Tflops", "t": 1712077299.7217174}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 297.552}}, "t": 1712077299.6812122}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.24740481934785, "units": "Tflops", "t": 1712077299.88685}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.21853627079372, "units": "Tflops", "t": 1712077300.0519881}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.24875232384622, "units": "Tflops", "t": 1712077300.2170668}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 298.442}}, "t": 1712077300.188352}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.23720316957952, "units": "Tflops", "t": 1712077300.3822114}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.18833391847545, "units": "Tflops", "t": 1712077300.5473757}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.20814665526834, "units": "Tflops", "t": 1712077300.7125075}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 298.138}}, "t": 1712077300.6968179}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.2035295685289, "units": "Tflops", "t": 1712077300.8777108}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.23701070063657, "units": "Tflops", "t": 1712077301.0428088}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.23662576441882, "units": "Tflops", "t": 1712077301.2079144}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 299.509}}, "t": 1712077301.203931}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.27320463908413, "units": "Tflops", "t": 1712077301.373013}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.27994504363647, "units": "Tflops", "t": 1712077301.5380626}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.2541426143916, "units": "Tflops", "t": 1712077301.703145}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.2866861300266, "units": "Tflops", "t": 1712077301.868178}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 298.825}}, "t": 1712077301.711091}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.25144741460687, "units": "Tflops", "t": 1712077302.0333116}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.3003630001962, "units": "Tflops", "t": 1712077302.1983297}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32", "--tf32"], "time": 1712077302.660153, "return_code": 0}, "pipe": null} diff --git a/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/whisper.D0.data b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/whisper.D0.data new file mode 100644 index 000000000..458e3853a --- /dev/null +++ b/paice-v1-10-g1243bba/NVIDIA_A100_80GB_PCIe/majaguma.2024-04-02_16:55:21.895752/whisper.D0.data @@ -0,0 +1,684 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "trustingspider.eastus2.cloudapp.azure.com", "ip": "trustingspider.eastus2.cloudapp.azure.com", "ipaddrlist": ["127.0.0.1", "10.0.1.4", "::1", "fe80::6245:bdff:fec0:7ec%eth0", "60:45:bd:c0:07:ec", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.trustingspider.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "majaguma.2024-04-02_16:55:21.895752", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "b7a983fcf74c005cfc04d84913c69872", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Whisper", "--batch-size": 64}, "plan": {"method": "per_gpu"}, "tags": ["audio", "huggingface"], "weight": 1.0, "name": "whisper", "tag": ["whisper", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "trustingspider", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-b19ad74c-adf3-683a-1bdd-0ce516ef4aae": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 73, "power": 105.874, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712078191.204879, "milabench": {"tag": "paice-v1-10-g1243bba", "commit": "1243bbae76bfc8105d553472dcfce834b54a723e", "date": "2024-04-02 12:02:23 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-whisper.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Whisper", "--batch-size", "64"], "time": 1712078191.221243}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1360.375, 81920.0], "load": 0, "temperature": 69, "power": 94.767}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1360.375, 81920.0], "load": 0, "temperature": 68, "power": 93.365}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6992721557617188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.4653310775756836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.970947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6924057006835938}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.92, "temperature": 72, "power": 301.855}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7043609619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6922607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6987991333007812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6994171142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912460327148438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6975479125976562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6967544555664062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912155151367188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6952743530273438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6956329345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 73, "power": 144.457}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6933135986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6947784423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6918106079101562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6918716430664062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6938247680664062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6923065185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69122314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6926803588867188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6926116943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 234.2913916211188, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6917190551757812}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.92, "temperature": 73, "power": 330.102}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69244384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.04746175148657, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6915664672851562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912841796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6919631958007812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6917724609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.72370533201257, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691802978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6913375854492188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 216.85477238350038, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912460327148438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691558837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 1.0, "temperature": 74, "power": 338.011}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914215087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 216.6530419972232, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691375732421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69146728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912155151367188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912612915039062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 216.90345245547238, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6913833618164062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912765502929688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912918090820312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.8453820003199, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 1.0, "temperature": 74, "power": 292.171}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912460327148438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 216.11544837275375, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912155151367188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912307739257812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.35952419259772, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912155151367188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.98, "temperature": 75, "power": 298.153}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.56933978331955, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912384033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.5706106402332, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912307739257812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 216.1818606460464, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.92, "temperature": 74, "power": 332.135}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 216.31044024012843, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 216.40257392467467, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911468505859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.95571962941324, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691131591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.92, "temperature": 75, "power": 280.408}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.70580108910144, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691131591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.36098047474337, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911468505859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.20621401150512, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 76, "power": 320.074}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.72205777988853, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.87064656556868, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 75, "power": 323.693}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.08866226770925, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.04680735114928, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.0307817080435, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 76, "power": 272.571}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69110107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.37628234333806, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.3267088734617, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691131591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911468505859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.32214067767083, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.92, "temperature": 76, "power": 157.032}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.30861548038735, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691131591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911087036132812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.16081136691471, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911087036132812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.16926617279907, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.92, "temperature": 76, "power": 257.02}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69110107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.47607760304805, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910781860351562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.66688120832552, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910781860351562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [144, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911087036132812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [145, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910629272460938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [146, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910858154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.92, "temperature": 77, "power": 336.298}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.50669730170878, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [147, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910629272460938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [148, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910552978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [149, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910476684570312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [150, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910552978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.16736779342986, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [151, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910476684570312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [152, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910247802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [153, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691009521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [154, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6909866333007812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.19906234053013, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [155, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6909027099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [156, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6908340454101562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [157, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.93, "temperature": 75, "power": 339.327}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [158, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6919326782226562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.9279660816675, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [159, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914596557617188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [160, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910934448242188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [161, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914291381835938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [162, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691009521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.10770201189231, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [163, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69134521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [164, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691009521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [165, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [166, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69110107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.52691945922763, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [167, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910018920898438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [168, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912841796875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.98, "temperature": 77, "power": 307.795}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [169, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910476684570312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [170, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691253662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.18414582451112, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [171, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910247802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [172, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [173, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910247802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [174, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.57904893467582, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [175, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910324096679688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [176, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910324096679688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [177, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910858154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [178, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6909027099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.22158187895425, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [179, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6909942626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 1.0, "temperature": 76, "power": 283.237}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [180, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6908721923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [181, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6908798217773438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [182, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6908721923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.49812755637112, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [183, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6906585693359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [184, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6905517578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [185, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6905288696289062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [186, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6909103393554688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.6168404494011, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [187, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6955337524414062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [188, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7002067565917969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [189, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.697906494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [190, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6913986206054688}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 1.0, "temperature": 76, "power": 281.872}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.1268088992443, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [191, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6951828002929688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [192, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6938323974609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [193, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914520263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [194, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6948165893554688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.14551093007182, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [195, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912918090820312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [196, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6923904418945312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [197, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.693206787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [198, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6908798217773438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.4679434605734, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [199, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6921310424804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [200, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6922607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [201, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6906814575195312}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 1.0, "temperature": 76, "power": 273.272}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [202, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6915359497070312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.3693376309808, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [203, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6916046142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [204, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6903610229492188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [205, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6906814575195312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [206, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69073486328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.676335311893, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [207, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6897354125976562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [208, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6897735595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [209, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6895599365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [210, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6880340576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.74458646748022, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [211, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6874923706054688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [212, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6850967407226562}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.92, "temperature": 77, "power": 286.651}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [213, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6821975708007812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [214, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.67584228515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.33226381498915, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [215, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6980819702148438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [216, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.832855224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [217, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7104606628417969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [218, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.708587646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.93003322162104, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [219, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6919479370117188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [220, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6960067749023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [221, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6996231079101562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [222, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6961898803710938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.1493433197456, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [223, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914520263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 1.0, "temperature": 77, "power": 278.47}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [224, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [225, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6962814331054688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [226, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6955718994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.7189868666101, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [227, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6918563842773438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [228, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691741943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [229, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.693878173828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [230, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6942672729492188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.4194120149196, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [231, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6923370361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [232, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [233, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [234, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6931381225585938}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 77, "power": 295.422}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.5700406497057, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [235, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6926040649414062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [236, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6917266845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [237, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [238, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6917572021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 216.9245137781287, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [239, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69232177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [240, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6920166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [241, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [242, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691253662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.7145527116028, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [243, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [244, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69171142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [245, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6917266845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.92, "temperature": 77, "power": 281.275}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [246, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6913909912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.4835698946194, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [247, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [248, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [249, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914443969726562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [250, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.8765232032213, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [251, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6913299560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [252, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910629272460938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [253, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912384033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [254, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.67232322291449, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [255, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914215087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [256, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 76, "power": 307.366}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [257, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912460327148438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [258, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.40894191513954, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [259, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [260, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912384033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [261, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [262, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912612915039062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.38790475698414, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.96, "temperature": 77, "power": 309.046}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-whisper.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Whisper", "--batch-size", "64"], "time": 1712078273.8594427, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/README.md b/paice-v1-11-g010135f/NVIDIA_A10-24Q/README.md new file mode 100644 index 000000000..48fc2f8e1 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/README.md @@ -0,0 +1,38 @@ +``` +================= +Benchmark results +================= + fail n perf sem% std% peak_memory score weight +bert-fp16 4 4 NaN NaN NaN 24000 NaN 0.00 +bert-fp32 4 4 NaN NaN NaN 23304 NaN 0.00 +bert-tf32 4 4 NaN NaN NaN 23304 NaN 0.00 +bert-tf32-fp16 4 4 NaN NaN NaN 24000 NaN 3.00 +bf16 0 4 91.87 0.1% 1.4% 3098 183.777391 0.00 +convnext_large-fp16 4 4 NaN NaN NaN 24394 NaN 0.00 +convnext_large-fp32 4 4 NaN NaN NaN 24430 NaN 0.00 +convnext_large-tf32 4 4 NaN NaN NaN 24430 NaN 0.00 +convnext_large-tf32-fp16 4 4 NaN NaN NaN 24470 NaN 3.00 +davit_large 4 4 NaN NaN NaN 24438 NaN 1.00 +davit_large-multi 2 2 NaN NaN NaN 24366 NaN 5.00 +dlrm 0 2 376081.29 0.1% 1.4% 5996 376081.290012 1.00 +focalnet 0 4 146.78 1.0% 15.0% 24468 293.712272 2.00 +fp16 0 4 92.92 0.1% 1.1% 3098 185.826273 0.00 +fp32 0 4 15.61 0.1% 1.4% 3476 31.219423 0.00 +llama 4 4 NaN NaN NaN -1 NaN 1.00 +reformer 4 4 NaN NaN NaN 23556 NaN 1.00 +regnet_y_128gf 4 4 NaN NaN NaN 24450 NaN 2.00 +resnet152 4 4 NaN NaN NaN 24458 NaN 1.00 +resnet152-multi 2 2 NaN NaN NaN 24470 NaN 5.00 +resnet50 0 4 546.80 0.5% 8.1% 5838 1094.496142 1.00 +rwkv 4 4 NaN NaN NaN 3976 NaN 1.00 +stargan 4 4 NaN NaN NaN 24384 NaN 1.00 +super-slomo 4 4 NaN NaN NaN 24458 NaN 1.00 +t5 4 4 NaN NaN NaN 24098 NaN 2.00 +tf32 0 4 44.61 0.1% 1.0% 3476 89.225443 0.00 +whisper 4 4 NaN NaN NaN 23124 NaN 1.00 + +Scores +------ +Failure rate: 74.51% (FAIL) +Score: 2.65 +``` diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/badge.svg b/paice-v1-11-g010135f/NVIDIA_A10-24Q/badge.svg new file mode 100644 index 000000000..726e6f405 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/badge.svg @@ -0,0 +1 @@ +NVIDIA_A10-24QNVIDIA_A10-24Qpartialpartial \ No newline at end of file diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-fp16.D0.data new file mode 100644 index 000000000..96b27a461 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-fp16.D0.data @@ -0,0 +1,38 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "fp16", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-fp16", "tag": ["bert-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255816.478905, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255818.8367445}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.480685234069824}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23999.8125, 24512.0], "load": 0.17, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 512.19 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"} +{"event": "line", "data": " runner.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"} +{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 59, in step\n", "pipe": "stderr"} +{"event": "line", "data": " self.amp_scaler.scale(loss).backward()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 512.19 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255823.3500848, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-fp16.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-fp16.D1.data new file mode 100644 index 000000000..7d76cc6e2 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-fp16.D1.data @@ -0,0 +1,38 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "fp16", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-fp16", "tag": ["bert-fp16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255818.821121, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255818.8442266}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.480685234069824}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23999.8125, 24512.0], "load": 0.15, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 512.19 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"} +{"event": "line", "data": " runner.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"} +{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 59, in step\n", "pipe": "stderr"} +{"event": "line", "data": " self.amp_scaler.scale(loss).backward()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 512.19 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255823.0727057, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-fp32.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-fp32.D0.data new file mode 100644 index 000000000..ff3448ff8 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-fp32.D0.data @@ -0,0 +1,56 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "fp32", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-fp32", "tag": ["bert-fp32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255807.772349, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255810.1287518}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23323.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.16 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"} +{"event": "line", "data": " runner.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"} +{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"} +{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 1375, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " prediction_scores = self.cls(sequence_output)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 707, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " prediction_scores = self.predictions(sequence_output)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 697, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " hidden_states = self.decoder(hidden_states)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.16 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255814.0971403, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-fp32.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-fp32.D1.data new file mode 100644 index 000000000..3ff463818 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-fp32.D1.data @@ -0,0 +1,56 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "fp32", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-fp32", "tag": ["bert-fp32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0.02, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0.12, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255810.113409, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp32.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255810.1362917}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23323.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.16 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"} +{"event": "line", "data": " runner.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"} +{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"} +{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 1375, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " prediction_scores = self.cls(sequence_output)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 707, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " prediction_scores = self.predictions(sequence_output)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 697, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " hidden_states = self.decoder(hidden_states)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.16 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp32.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255813.9872866, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-tf32-fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-tf32-fp16.D0.data new file mode 100644 index 000000000..bf98cee01 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-tf32-fp16.D0.data @@ -0,0 +1,38 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 3.0, "name": "bert-tf32-fp16", "tag": ["bert-tf32-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255834.408576, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255836.796411}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.480685234069824}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23999.8125, 24512.0], "load": 0.19, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 512.19 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"} +{"event": "line", "data": " runner.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"} +{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 59, in step\n", "pipe": "stderr"} +{"event": "line", "data": " self.amp_scaler.scale(loss).backward()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 512.19 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255841.190781, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-tf32-fp16.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-tf32-fp16.D1.data new file mode 100644 index 000000000..ab237f264 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-tf32-fp16.D1.data @@ -0,0 +1,38 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 3.0, "name": "bert-tf32-fp16", "tag": ["bert-tf32-fp16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0.04, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0.11, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255836.779172, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255836.8041034}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.480685234069824}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23999.8125, 24512.0], "load": 0.15, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 512.19 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"} +{"event": "line", "data": " runner.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"} +{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 59, in step\n", "pipe": "stderr"} +{"event": "line", "data": " self.amp_scaler.scale(loss).backward()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 512.19 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255841.1329126, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-tf32.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-tf32.D0.data new file mode 100644 index 000000000..ceb0c2436 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-tf32.D0.data @@ -0,0 +1,56 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-tf32", "tag": ["bert-tf32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255825.712579, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255828.0542758}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23323.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.16 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"} +{"event": "line", "data": " runner.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"} +{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"} +{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 1375, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " prediction_scores = self.cls(sequence_output)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 707, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " prediction_scores = self.predictions(sequence_output)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 697, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " hidden_states = self.decoder(hidden_states)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.16 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255831.7830172, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-tf32.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-tf32.D1.data new file mode 100644 index 000000000..43fd61b5f --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bert-tf32.D1.data @@ -0,0 +1,56 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-tf32", "tag": ["bert-tf32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255828.038916, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255828.0615072}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23323.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.16 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"} +{"event": "line", "data": " runner.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"} +{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"} +{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 1375, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " prediction_scores = self.cls(sequence_output)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 707, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " prediction_scores = self.predictions(sequence_output)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 697, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " hidden_states = self.decoder(hidden_states)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.16 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712255832.0014517, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bf16.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bf16.D0.data new file mode 100644 index 000000000..164ca0573 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bf16.D0.data @@ -0,0 +1,137 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "bf16"}, "weight": 0.0, "name": "bf16", "tag": ["bf16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0.04, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255466.590297, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "bf16"], "time": 1712255468.905043}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 84.31733446984312, "units": "Tflops", "t": 1712255470.4362257}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712255470.0362403}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.50631369394608, "units": "Tflops", "t": 1712255470.6695824}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.09, "temperature": null, "power": null}}, "t": 1712255470.5420935}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.51009040606831, "units": "Tflops", "t": 1712255470.9023838}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.24605109321959, "units": "Tflops", "t": 1712255471.146118}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.22, "temperature": null, "power": null}}, "t": 1712255471.0473926}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.38217852944457, "units": "Tflops", "t": 1712255471.384261}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.51280207785311, "units": "Tflops", "t": 1712255471.6169817}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.42, "temperature": null, "power": null}}, "t": 1712255471.5526612}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.06555294663043, "units": "Tflops", "t": 1712255471.8508568}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.34430091058142, "units": "Tflops", "t": 1712255472.0916586}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.55, "temperature": null, "power": null}}, "t": 1712255472.058135}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.02050884954336, "units": "Tflops", "t": 1712255472.3309634}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.65265169436914, "units": "Tflops", "t": 1712255472.565822}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.75, "temperature": null, "power": null}}, "t": 1712255472.5634499}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.99997999260887, "units": "Tflops", "t": 1712255472.799877}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.55228533466969, "units": "Tflops", "t": 1712255473.0375352}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.4306846651339, "units": "Tflops", "t": 1712255473.278101}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.88, "temperature": null, "power": null}}, "t": 1712255473.0687656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.46464980649935, "units": "Tflops", "t": 1712255473.5134919}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.83167377290025, "units": "Tflops", "t": 1712255473.7478988}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255473.574011}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.20449154472807, "units": "Tflops", "t": 1712255473.986491}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.6424433242199, "units": "Tflops", "t": 1712255474.2264953}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255474.0793128}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.22865659169432, "units": "Tflops", "t": 1712255474.4625025}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.54920935043356, "units": "Tflops", "t": 1712255474.6976256}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255474.584662}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.01711047295095, "units": "Tflops", "t": 1712255474.93414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.86543185225207, "units": "Tflops", "t": 1712255475.1735651}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255475.0900052}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.90645601656371, "units": "Tflops", "t": 1712255475.4103608}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.47573242528249, "units": "Tflops", "t": 1712255475.6456726}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255475.5955245}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.10687665784504, "units": "Tflops", "t": 1712255475.88196}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.83460716580517, "units": "Tflops", "t": 1712255476.1214762}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255476.100936}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.48787705469407, "units": "Tflops", "t": 1712255476.359656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.38449753163003, "units": "Tflops", "t": 1712255476.5951912}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.5363141157061, "units": "Tflops", "t": 1712255476.832881}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255476.6061387}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.20412284511447, "units": "Tflops", "t": 1712255477.0714762}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.38930395641036, "units": "Tflops", "t": 1712255477.309551}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255477.1116827}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.35415696038632, "units": "Tflops", "t": 1712255477.5452068}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.48750608524567, "units": "Tflops", "t": 1712255477.783023}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255477.6170638}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.37745969583447, "units": "Tflops", "t": 1712255478.0211692}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.07360437054737, "units": "Tflops", "t": 1712255478.2600505}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255478.1223423}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.39140016499302, "units": "Tflops", "t": 1712255478.495614}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.41948353044396, "units": "Tflops", "t": 1712255478.7336192}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255478.6275706}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.40976096318262, "units": "Tflops", "t": 1712255478.9718282}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.42605899732119, "units": "Tflops", "t": 1712255479.2098012}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255479.1328607}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.79923893059077, "units": "Tflops", "t": 1712255479.4468596}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.4768419861293, "units": "Tflops", "t": 1712255479.6847122}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255479.638394}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.42078002732306, "units": "Tflops", "t": 1712255479.9227474}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.28826273182227, "units": "Tflops", "t": 1712255480.1610806}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255480.1437087}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.51904911602156, "units": "Tflops", "t": 1712255480.398864}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.42642947400104, "units": "Tflops", "t": 1712255480.636837}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.5540499353741, "units": "Tflops", "t": 1712255480.874483}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255480.6490898}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.23003116730106, "units": "Tflops", "t": 1712255481.1130168}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.41716844791688, "units": "Tflops", "t": 1712255481.351014}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255481.1544216}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.50726434922842, "units": "Tflops", "t": 1712255481.5888412}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.56677562725463, "units": "Tflops", "t": 1712255481.8264527}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255481.659704}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.11130444494044, "units": "Tflops", "t": 1712255482.0652885}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.28613889544037, "units": "Tflops", "t": 1712255482.3036225}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255482.165049}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.42522543565042, "units": "Tflops", "t": 1712255482.5416481}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.4191131094453, "units": "Tflops", "t": 1712255482.7796466}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255482.6703403}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.50819217473018, "units": "Tflops", "t": 1712255483.0174541}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.41050165821994, "units": "Tflops", "t": 1712255483.255465}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255483.1756403}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.42300267803238, "units": "Tflops", "t": 1712255483.4934871}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.41679804547584, "units": "Tflops", "t": 1712255483.731491}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255483.6809924}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.4136497446005, "units": "Tflops", "t": 1712255483.9695513}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.2733056166573, "units": "Tflops", "t": 1712255484.208019}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255484.186317}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.4243918890148, "units": "Tflops", "t": 1712255484.4460564}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.41957613615757, "units": "Tflops", "t": 1712255484.6840491}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.24995636086726, "units": "Tflops", "t": 1712255484.9224758}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255484.6917884}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.41855748351477, "units": "Tflops", "t": 1712255485.1605139}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.42642947400104, "units": "Tflops", "t": 1712255485.3984852}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255485.1970625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.41809446700944, "units": "Tflops", "t": 1712255485.636522}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.68714185141376, "units": "Tflops", "t": 1712255485.87906}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255485.7023659}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.43245013634089, "units": "Tflops", "t": 1712255486.1170676}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.37884753803, "units": "Tflops", "t": 1712255486.3551602}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255486.2078986}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.42661471345474, "units": "Tflops", "t": 1712255486.593173}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.71853734428491, "units": "Tflops", "t": 1712255486.8329842}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255486.7132907}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.41976134814156, "units": "Tflops", "t": 1712255487.0710182}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.40790927753591, "units": "Tflops", "t": 1712255487.309043}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255487.219902}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.35821924809419, "units": "Tflops", "t": 1712255487.5473814}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.37805624982441, "units": "Tflops", "t": 1712255487.7880902}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255487.7251945}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.38828597097721, "units": "Tflops", "t": 1712255488.0262096}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.29962229886915, "units": "Tflops", "t": 1712255488.2645056}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255488.2305722}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.39892965477048, "units": "Tflops", "t": 1712255488.5025978}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.37850890371786, "units": "Tflops", "t": 1712255488.7433012}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255488.7359383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.57047541511608, "units": "Tflops", "t": 1712255488.9835434}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.02546673732121, "units": "Tflops", "t": 1712255489.2225525}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.42689257402746, "units": "Tflops", "t": 1712255489.4605222}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255489.2412555}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 89.79489112557708, "units": "Tflops", "t": 1712255489.7055163}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.38541722362895, "units": "Tflops", "t": 1712255489.9435904}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255489.746608}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.29066370805562, "units": "Tflops", "t": 1712255490.181972}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.28733931268843, "units": "Tflops", "t": 1712255490.4202974}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255490.2520156}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.55084263334564, "units": "Tflops", "t": 1712255490.66059}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.75950723515646, "units": "Tflops", "t": 1712255490.9004068}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255490.7572982}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.17868969080234, "units": "Tflops", "t": 1712255491.13908}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.32734029160343, "units": "Tflops", "t": 1712255491.3773057}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255491.262598}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.38249245136332, "units": "Tflops", "t": 1712255491.6180596}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "bf16"], "time": 1712255492.1812196, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bf16.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bf16.D1.data new file mode 100644 index 000000000..1884083d8 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/bf16.D1.data @@ -0,0 +1,137 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "bf16"}, "weight": 0.0, "name": "bf16", "tag": ["bf16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0.04, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255468.896415, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "bf16"], "time": 1712255468.905882}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 84.43713323423174, "units": "Tflops", "t": 1712255470.4139261}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712255470.0124965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 95.42324224043428, "units": "Tflops", "t": 1712255470.6449673}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.1, "temperature": null, "power": null}}, "t": 1712255470.5186343}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.7306303225906, "units": "Tflops", "t": 1712255470.8772168}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.63171418855094, "units": "Tflops", "t": 1712255471.1199105}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.23, "temperature": null, "power": null}}, "t": 1712255471.0245}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.7691908690281, "units": "Tflops", "t": 1712255471.3547773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.44853859868697, "units": "Tflops", "t": 1712255471.5876806}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.43, "temperature": null, "power": null}}, "t": 1712255471.5298777}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.41637163342693, "units": "Tflops", "t": 1712255471.8232083}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.82948696691938, "units": "Tflops", "t": 1712255472.0627613}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.56, "temperature": null, "power": null}}, "t": 1712255472.0355153}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.33889436923431, "units": "Tflops", "t": 1712255472.3015506}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.02460705920443, "units": "Tflops", "t": 1712255472.535485}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.49556624211912, "units": "Tflops", "t": 1712255472.76837}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.76, "temperature": null, "power": null}}, "t": 1712255472.5411787}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.96676329714575, "units": "Tflops", "t": 1712255473.0052345}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.89014312340933, "units": "Tflops", "t": 1712255473.2446036}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}, "t": 1712255473.0466676}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.5119350081745, "units": "Tflops", "t": 1712255473.4800181}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.5064105289479, "units": "Tflops", "t": 1712255473.7127585}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255473.552015}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.18994236717737, "units": "Tflops", "t": 1712255473.9488537}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.94217174195401, "units": "Tflops", "t": 1712255474.188145}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255474.0578268}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.89868918573929, "units": "Tflops", "t": 1712255474.4251978}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.57482560148696, "units": "Tflops", "t": 1712255474.658213}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255474.5634713}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.68556248566537, "units": "Tflops", "t": 1712255474.8958733}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.81422119192186, "units": "Tflops", "t": 1712255475.135553}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255475.069129}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.98841433932571, "units": "Tflops", "t": 1712255475.372179}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.0623872388584, "units": "Tflops", "t": 1712255475.6061213}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255475.574926}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.21716142154506, "units": "Tflops", "t": 1712255475.8421662}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.08472727980903, "units": "Tflops", "t": 1712255476.0810685}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255476.0806623}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.5115325006472, "units": "Tflops", "t": 1712255476.3190844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.38544303752961, "units": "Tflops", "t": 1712255476.554632}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.0303391627988, "units": "Tflops", "t": 1712255476.7911675}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255476.5860624}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.01050288854016, "units": "Tflops", "t": 1712255477.030295}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.45079483481992, "units": "Tflops", "t": 1712255477.2682111}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255477.0919209}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.7488474673816, "units": "Tflops", "t": 1712255477.5054321}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.69089485624697, "units": "Tflops", "t": 1712255477.7404397}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255477.5977395}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.02142693660501, "units": "Tflops", "t": 1712255477.9797747}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.89490385802321, "units": "Tflops", "t": 1712255478.219234}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255478.1033907}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.84024569366447, "units": "Tflops", "t": 1712255478.4564245}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.85146113950891, "units": "Tflops", "t": 1712255478.6933143}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255478.6090999}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.20126552308362, "units": "Tflops", "t": 1712255478.9320936}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.17297837579811, "units": "Tflops", "t": 1712255479.1708422}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255479.114808}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.9674192411993, "units": "Tflops", "t": 1712255479.4076736}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.79027644667492, "units": "Tflops", "t": 1712255479.644727}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255479.6212878}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.48667141486742, "units": "Tflops", "t": 1712255479.882796}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.36931852814708, "units": "Tflops", "t": 1712255480.1210608}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.60116177848478, "units": "Tflops", "t": 1712255480.35859}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255480.1269372}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.23768658907109, "units": "Tflops", "t": 1712255480.5973053}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.11397220468167, "units": "Tflops", "t": 1712255480.8361886}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255480.6322792}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.36746846288202, "units": "Tflops", "t": 1712255481.0743773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.58526662592641, "units": "Tflops", "t": 1712255481.312054}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255481.1378949}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.97591188752702, "units": "Tflops", "t": 1712255481.5515196}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.68714588622315, "units": "Tflops", "t": 1712255481.788829}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255481.6435814}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.61817842719907, "units": "Tflops", "t": 1712255482.0264785}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.00609728386748, "units": "Tflops", "t": 1712255482.2655454}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255482.1492035}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.44245543291528, "units": "Tflops", "t": 1712255482.5037355}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.62375901777159, "units": "Tflops", "t": 1712255482.7412024}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255482.6548126}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.52415368697116, "units": "Tflops", "t": 1712255482.9791477}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.30627309920374, "units": "Tflops", "t": 1712255483.2174418}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255483.160345}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.42476235233003, "units": "Tflops", "t": 1712255483.4554894}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.44894150436994, "units": "Tflops", "t": 1712255483.6934173}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255483.6661813}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.44643962612749, "units": "Tflops", "t": 1712255483.9314725}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.0283131735444, "units": "Tflops", "t": 1712255484.1705503}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.42031698854764, "units": "Tflops", "t": 1712255484.4085517}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255484.1717713}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.52118368626097, "units": "Tflops", "t": 1712255484.6463883}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.60199851664846, "units": "Tflops", "t": 1712255484.8839822}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255484.6774912}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.26582887699695, "units": "Tflops", "t": 1712255485.122456}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.03510860417994, "units": "Tflops", "t": 1712255485.3614542}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255485.1830454}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.58829763511311, "units": "Tflops", "t": 1712255485.601731}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.64739096922534, "units": "Tflops", "t": 1712255485.839205}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255485.6886952}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.98361691415816, "units": "Tflops", "t": 1712255486.078608}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.4473662319097, "units": "Tflops", "t": 1712255486.316537}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255486.1940565}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.98912129502428, "units": "Tflops", "t": 1712255486.5558977}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.23076898394027, "units": "Tflops", "t": 1712255486.7945006}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255486.6999109}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.23962370446485, "units": "Tflops", "t": 1712255487.0331292}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.3931910162418, "units": "Tflops", "t": 1712255487.2713127}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255487.2059104}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.13053417027704, "units": "Tflops", "t": 1712255487.5103962}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.91459290199205, "units": "Tflops", "t": 1712255487.7497797}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255487.7117453}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.42031698854764, "units": "Tflops", "t": 1712255487.9878573}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.33177671541954, "units": "Tflops", "t": 1712255488.226122}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255488.21736}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.59148090444579, "units": "Tflops", "t": 1712255488.4663923}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.2574305281935, "units": "Tflops", "t": 1712255488.704815}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.4161498483484, "units": "Tflops", "t": 1712255488.9428568}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255488.7229724}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.42318790375046, "units": "Tflops", "t": 1712255489.1809745}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.84795894099558, "units": "Tflops", "t": 1712255489.4204614}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255489.2285285}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.0632192864293, "units": "Tflops", "t": 1712255489.6594381}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.26389066079385, "units": "Tflops", "t": 1712255489.8978834}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255489.7343385}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.12556495529024, "units": "Tflops", "t": 1712255490.136759}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.56193048458238, "units": "Tflops", "t": 1712255490.3770025}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255490.2398126}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.5783854423225, "units": "Tflops", "t": 1712255490.6173174}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.15041639212926, "units": "Tflops", "t": 1712255490.8560798}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255490.7452235}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.94391315428545, "units": "Tflops", "t": 1712255491.0953827}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.77210667198766, "units": "Tflops", "t": 1712255491.335063}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255491.2509296}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.55529562794469, "units": "Tflops", "t": 1712255491.5754318}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "bf16"], "time": 1712255492.0723476, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-fp16.D0.data new file mode 100644 index 000000000..973c56c7d --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-fp16.D0.data @@ -0,0 +1,70 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-fp16", "tag": ["convnext_large-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255763.64029, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255766.011682}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [10849.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24393.8125, 24512.0], "load": 0.06, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 118.19 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/activation.py\", line 682, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.gelu(input, approximate=self.approximate)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 118.19 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255771.5214307, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-fp16.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-fp16.D1.data new file mode 100644 index 000000000..4937c37d4 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-fp16.D1.data @@ -0,0 +1,70 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-fp16", "tag": ["convnext_large-fp16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255765.997041, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255766.0182161}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5243.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24393.8125, 24512.0], "load": 0.04, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 118.19 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/activation.py\", line 682, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.gelu(input, approximate=self.approximate)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 118.19 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255771.6062105, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-fp32.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-fp32.D0.data new file mode 100644 index 000000000..2db857c87 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-fp32.D0.data @@ -0,0 +1,70 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "fp32", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-fp32", "tag": ["convnext_large-fp32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0.02, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255753.452555, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255755.788694}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24429.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24427.8125, 24512.0], "load": 0.06, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 84.19 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 84.19 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255761.3059878, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-fp32.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-fp32.D1.data new file mode 100644 index 000000000..56a006dda --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-fp32.D1.data @@ -0,0 +1,70 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "fp32", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-fp32", "tag": ["convnext_large-fp32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255755.774073, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp32.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255755.7957416}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24429.8125, 24512.0], "load": 0.04, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24427.8125, 24512.0], "load": 0.1, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 84.19 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 84.19 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp32.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255761.234581, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-tf32-fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-tf32-fp16.D0.data new file mode 100644 index 000000000..e7fa603e3 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-tf32-fp16.D0.data @@ -0,0 +1,70 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 3.0, "name": "convnext_large-tf32-fp16", "tag": ["convnext_large-tf32-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255784.292977, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255786.6583555}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24469.8125, 24512.0], "load": 0.04, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24393.8125, 24512.0], "load": 0.04, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 118.19 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/activation.py\", line 682, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.gelu(input, approximate=self.approximate)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 118.19 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255792.0367098, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-tf32-fp16.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-tf32-fp16.D1.data new file mode 100644 index 000000000..8ecf45fc9 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-tf32-fp16.D1.data @@ -0,0 +1,70 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 3.0, "name": "convnext_large-tf32-fp16", "tag": ["convnext_large-tf32-fp16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255786.642294, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255786.6658125}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24469.8125, 24512.0], "load": 0.03, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24393.8125, 24512.0], "load": 0.09, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 118.19 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/activation.py\", line 682, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.gelu(input, approximate=self.approximate)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 118.19 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255792.1112833, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-tf32.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-tf32.D0.data new file mode 100644 index 000000000..bc84db6eb --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-tf32.D0.data @@ -0,0 +1,70 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-tf32", "tag": ["convnext_large-tf32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255773.961569, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255776.3774183}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24429.8125, 24512.0], "load": 0.03, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24427.8125, 24512.0], "load": 0.03, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 84.19 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 84.19 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255781.8679147, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-tf32.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-tf32.D1.data new file mode 100644 index 000000000..57632998b --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/convnext_large-tf32.D1.data @@ -0,0 +1,70 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-tf32", "tag": ["convnext_large-tf32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255776.361131, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255776.3849685}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24429.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24427.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 84.19 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 84.19 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712255781.9482388, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/davit_large-multi.0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/davit_large-multi.0.data new file mode 100644 index 000000000..84c62211d --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/davit_large-multi.0.data @@ -0,0 +1,228 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "njobs", "n": 1}, "argv": {"--amp": true, "--model": "davit_large", "--batch-size": 128, "--lr-base": 0.01}, "tags": ["classification", "multigpu", "transformer", "vision"], "weight": 5.0, "name": "davit_large-multi", "tag": ["davit_large-multi", "0"], "job-number": 0, "devices": ["0", "1"]}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255917.190164, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["torchrun", "--nproc_per_node=2", "--", "-m", "voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/davit_large-multi.0", "--checkpoint-hist", "1"], "time": 1712255917.207126}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training in distributed mode with multiple processes, 1 device per process.Process 0, total 2, device cuda:0.\n", "pipe": "stderr"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training in distributed mode with multiple processes, 1 device per process.Process 1, total 2, device cuda:1.\n", "pipe": "stderr"} +{"event": "line", "data": "Model davit_large created, param count:196811752\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.01) calculated from base learning rate (0.01) and global batch size (256) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch DistributedDataParallel.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7083.75, 24512.0], "load": 0.07, "temperature": null, "power": null}, "1": {"memory": [7083.75, 24512.0], "load": 0.07, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 1 has a total capacty of 23.73 GiB of which 146.25 MiB is free. Including non-PyTorch memory, this process has 21.44 GiB memory in use. Of the allocated memory 20.89 GiB is allocated by PyTorch, and 162.76 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 196, in _run_module_as_main\n", "pipe": "stderr"} +{"event": "line", "data": " return _run_code(code, main_globals, None,\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 86, in _run_code\n", "pipe": "stderr"} +{"event": "line", "data": " exec(code, run_globals)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/__main__.py\", line 4, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1519, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " else self._run_ddp_forward(*inputs, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1355, in _run_ddp_forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self.module(*inputs, **kwargs) # type: ignore[index]\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 575, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 563, in forward_features\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.stages(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 433, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.blocks(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 186, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = x + self.drop_path2(self.mlp(self.norm2(x)))\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/layers/mlp.py\", line 41, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.act(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/activation.py\", line 682, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.gelu(input, approximate=self.approximate)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 1 has a total capacty of 23.73 GiB of which 146.25 MiB is free. Including non-PyTorch memory, this process has 21.44 GiB memory in use. Of the allocated memory 20.89 GiB is allocated by PyTorch, and 162.76 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24365.75, 24512.0], "load": 0.4, "temperature": null, "power": null}, "1": {"memory": [24365.75, 24512.0], "load": 0.39, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 146.25 MiB is free. Including non-PyTorch memory, this process has 21.44 GiB memory in use. Of the allocated memory 20.89 GiB is allocated by PyTorch, and 162.76 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 196, in _run_module_as_main\n", "pipe": "stderr"} +{"event": "line", "data": " return _run_code(code, main_globals, None,\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 86, in _run_code\n", "pipe": "stderr"} +{"event": "line", "data": " exec(code, run_globals)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/__main__.py\", line 4, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1519, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " else self._run_ddp_forward(*inputs, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1355, in _run_ddp_forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self.module(*inputs, **kwargs) # type: ignore[index]\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 575, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 563, in forward_features\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.stages(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 433, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.blocks(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 186, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = x + self.drop_path2(self.mlp(self.norm2(x)))\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/layers/mlp.py\", line 41, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.act(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/activation.py\", line 682, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.gelu(input, approximate=self.approximate)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 146.25 MiB is free. Including non-PyTorch memory, this process has 21.44 GiB memory in use. Of the allocated memory 20.89 GiB is allocated by PyTorch, and 162.76 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "line", "data": "[2024-04-04 18:38:48,214] torch.distributed.elastic.multiprocessing.api: [ERROR] failed (exitcode: 1) local_rank: 0 (pid: 34749) of binary: /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/python\n", "pipe": "stderr"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/torchrun\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py\", line 346, in wrapper\n", "pipe": "stderr"} +{"event": "line", "data": " return f(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/run.py\", line 806, in main\n", "pipe": "stderr"} +{"event": "line", "data": " run(args)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/run.py\", line 797, in run\n", "pipe": "stderr"} +{"event": "line", "data": " elastic_launch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/launcher/api.py\", line 134, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " return launch_agent(self._config, self._entrypoint, list(args))\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/launcher/api.py\", line 264, in launch_agent\n", "pipe": "stderr"} +{"event": "line", "data": " raise ChildFailedError(\n", "pipe": "stderr"} +{"event": "line", "data": "torch.distributed.elastic.multiprocessing.errors.ChildFailedError: \n", "pipe": "stderr"} +{"event": "line", "data": "============================================================\n", "pipe": "stderr"} +{"event": "line", "data": "voir FAILED\n", "pipe": "stderr"} +{"event": "line", "data": "------------------------------------------------------------\n", "pipe": "stderr"} +{"event": "line", "data": "Failures:\n", "pipe": "stderr"} +{"event": "line", "data": "[1]:\n", "pipe": "stderr"} +{"event": "line", "data": " time : 2024-04-04_18:38:48\n", "pipe": "stderr"} +{"event": "line", "data": " host : decentoriole.internal.cloudapp.net\n", "pipe": "stderr"} +{"event": "line", "data": " rank : 1 (local_rank: 1)\n", "pipe": "stderr"} +{"event": "line", "data": " exitcode : 1 (pid: 34750)\n", "pipe": "stderr"} +{"event": "line", "data": " error_file: \n", "pipe": "stderr"} +{"event": "line", "data": " traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html\n", "pipe": "stderr"} +{"event": "line", "data": "------------------------------------------------------------\n", "pipe": "stderr"} +{"event": "line", "data": "Root Cause (first observed failure):\n", "pipe": "stderr"} +{"event": "line", "data": "[0]:\n", "pipe": "stderr"} +{"event": "line", "data": " time : 2024-04-04_18:38:48\n", "pipe": "stderr"} +{"event": "line", "data": " host : decentoriole.internal.cloudapp.net\n", "pipe": "stderr"} +{"event": "line", "data": " rank : 0 (local_rank: 0)\n", "pipe": "stderr"} +{"event": "line", "data": " exitcode : 1 (pid: 34749)\n", "pipe": "stderr"} +{"event": "line", "data": " error_file: \n", "pipe": "stderr"} +{"event": "line", "data": " traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html\n", "pipe": "stderr"} +{"event": "line", "data": "============================================================\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["torchrun", "--nproc_per_node=2", "--", "-m", "voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/davit_large-multi.0", "--checkpoint-hist", "1"], "time": 1712255928.528487, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/davit_large.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/davit_large.D0.data new file mode 100644 index 000000000..87b4b7e48 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/davit_large.D0.data @@ -0,0 +1,92 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "davit_large", "--batch-size": 128, "--lr-base": 0.01}, "tags": ["classification", "transformer", "vision"], "weight": 1.0, "name": "davit_large", "tag": ["davit_large", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255904.482029, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/davit_large.D0", "--checkpoint-hist", "1"], "time": 1712255906.7814283}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model davit_large created, param count:196811752\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.005) calculated from base learning rate (0.01) and global batch size (128) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6043.8125, 24512.0], "load": 0.1, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24437.8125, 24512.0], "load": 0.36, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 74.19 MiB is free. Including non-PyTorch memory, this process has 21.51 GiB memory in use. Of the allocated memory 21.02 GiB is allocated by PyTorch, and 188.39 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 575, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 563, in forward_features\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.stages(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 433, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.blocks(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 353, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = x + self.drop_path2(self.mlp(self.norm2(x)))\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/layers/mlp.py\", line 40, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.fc1(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 74.19 MiB is free. Including non-PyTorch memory, this process has 21.51 GiB memory in use. Of the allocated memory 21.02 GiB is allocated by PyTorch, and 188.39 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/davit_large.D0", "--checkpoint-hist", "1"], "time": 1712255914.8352365, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/davit_large.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/davit_large.D1.data new file mode 100644 index 000000000..b157704c7 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/davit_large.D1.data @@ -0,0 +1,92 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "davit_large", "--batch-size": 128, "--lr-base": 0.01}, "tags": ["classification", "transformer", "vision"], "weight": 1.0, "name": "davit_large", "tag": ["davit_large", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255906.765655, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/davit_large.D1", "--checkpoint-hist", "1"], "time": 1712255906.7886434}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model davit_large created, param count:196811752\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.005) calculated from base learning rate (0.01) and global batch size (128) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [6043.8125, 24512.0], "load": 0.11, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24437.8125, 24512.0], "load": 0.3, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 74.19 MiB is free. Including non-PyTorch memory, this process has 21.51 GiB memory in use. Of the allocated memory 21.02 GiB is allocated by PyTorch, and 188.39 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 575, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 563, in forward_features\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.stages(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 433, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.blocks(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 353, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = x + self.drop_path2(self.mlp(self.norm2(x)))\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/layers/mlp.py\", line 40, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.fc1(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 74.19 MiB is free. Including non-PyTorch memory, this process has 21.51 GiB memory in use. Of the allocated memory 21.02 GiB is allocated by PyTorch, and 188.39 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/davit_large.D1", "--checkpoint-hist", "1"], "time": 1712255914.2563057, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/dlrm.0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/dlrm.0.data new file mode 100644 index 000000000..a422f184b --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/dlrm.0.data @@ -0,0 +1,266 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/dlrm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "dlrm", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "tags": ["nlp", "rl"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/dlrm", "plan": {"method": "njobs", "n": 1}, "argv": {"--num-batches": 1000, "--data-generation": "random", "--arch-mlp-bot": "512-512-64", "--arch-mlp-top": "1024-1024-1024-1", "--arch-sparse-feature-size": 64, "--arch-embedding-size": "1000000-1000000-1000000-1000000-1000000-1000000-1000000-1000000", "--num-indices-per-lookup": 100, "--arch-interaction-op": "dot", "--numpy-rand-seed": "727", "--print-freq": 999999, "--mini-batch-size": 16384, "--test-mini-batch-size": 16384, "--test-num-workers": 0, "--use-gpu": true}, "weight": 1.0, "name": "dlrm", "tag": ["dlrm", "0"], "job-number": 0, "devices": ["0", "1"]}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712256080.036951, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/dlrm/voirconf-dlrm.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/dlrm/dlrm/dlrm_s_pytorch.py", "--num-batches", "1000", "--data-generation", "random", "--arch-mlp-bot", "512-512-64", "--arch-mlp-top", "1024-1024-1024-1", "--arch-sparse-feature-size", "64", "--arch-embedding-size", "1000000-1000000-1000000-1000000-1000000-1000000-1000000-1000000", "--num-indices-per-lookup", "100", "--arch-interaction-op", "dot", "--numpy-rand-seed", "727", "--print-freq", "999999", "--mini-batch-size", "16384", "--test-mini-batch-size", "16384", "--test-num-workers", "0", "--use-gpu"], "time": 1712256080.0531104}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "line", "data": "Unable to import mlperf_logging, No module named 'mlperf_logging'\n", "pipe": "stdout"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:347: UserWarning: torch.distributed.reduce_op is deprecated, please use torch.distributed.ReduceOp instead\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "world size: 1, current rank: 0, local rank: 0\n", "pipe": "stdout"} +{"event": "line", "data": "Using 2 GPU(s)...\n", "pipe": "stdout"} +{"event": "line", "data": "time/loss/accuracy (if enabled):\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2661.8125, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2661.8125, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0887361615896225}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2695.8125, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [2659.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4973.8125, 24512.0], "load": 0.03, "temperature": null, "power": null}, "1": {"memory": [4955.8125, 24512.0], "load": 0.03, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08788755536079407}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4981.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [4963.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08937834203243256}, "pipe": "data"} +{"event": "data", "data": {"rate": 371425.095795438, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4981.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5367.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08813147246837616}, "pipe": "data"} +{"event": "data", "data": {"rate": 377784.3698403836, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5183.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5367.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08771650493144989}, "pipe": "data"} +{"event": "data", "data": {"rate": 376599.89734067005, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5183.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5367.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08742949366569519}, "pipe": "data"} +{"event": "data", "data": {"rate": 380970.32751775376, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5385.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5367.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08757737278938293}, "pipe": "data"} +{"event": "data", "data": {"rate": 377836.4252393994, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5385.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5367.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08654382824897766}, "pipe": "data"} +{"event": "data", "data": {"rate": 375045.5186776056, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5385.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0858154445886612}, "pipe": "data"} +{"event": "data", "data": {"rate": 370671.1267259742, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5385.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08628799766302109}, "pipe": "data"} +{"event": "data", "data": {"rate": 378051.5421037138, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5385.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08730762451887131}, "pipe": "data"} +{"event": "data", "data": {"rate": 382371.943906708, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5385.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08590050786733627}, "pipe": "data"} +{"event": "data", "data": {"rate": 372934.675634125, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5385.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08602667599916458}, "pipe": "data"} +{"event": "data", "data": {"rate": 380600.03742350225, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5385.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08581672608852386}, "pipe": "data"} +{"event": "data", "data": {"rate": 371078.53237081517, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5385.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08588778972625732}, "pipe": "data"} +{"event": "data", "data": {"rate": 376734.87398213864, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5385.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08632193505764008}, "pipe": "data"} +{"event": "data", "data": {"rate": 384627.1947390212, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5385.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08664406836032867}, "pipe": "data"} +{"event": "data", "data": {"rate": 380136.2741211303, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5385.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0852133184671402}, "pipe": "data"} +{"event": "data", "data": {"rate": 373562.4662382433, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5385.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08576950430870056}, "pipe": "data"} +{"event": "data", "data": {"rate": 381554.8237970788, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5587.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08471724390983582}, "pipe": "data"} +{"event": "data", "data": {"rate": 379318.91418293887, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5587.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08532554656267166}, "pipe": "data"} +{"event": "data", "data": {"rate": 380023.48472767865, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5587.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08427020162343979}, "pipe": "data"} +{"event": "data", "data": {"rate": 381719.00527997245, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5587.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08591149747371674}, "pipe": "data"} +{"event": "data", "data": {"rate": 381323.215296684, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5587.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0845126137137413}, "pipe": "data"} +{"event": "data", "data": {"rate": 375919.06176255655, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5587.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0840827077627182}, "pipe": "data"} +{"event": "data", "data": {"rate": 383527.7185739894, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5587.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08428709208965302}, "pipe": "data"} +{"event": "data", "data": {"rate": 375731.72974651406, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5587.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08434845507144928}, "pipe": "data"} +{"event": "data", "data": {"rate": 386745.5702536759, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5587.8125, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08465415984392166}, "pipe": "data"} +{"event": "data", "data": {"rate": 382363.8055317849, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5791.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08376755565404892}, "pipe": "data"} +{"event": "data", "data": {"rate": 375980.672977867, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5791.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.083040751516819}, "pipe": "data"} +{"event": "data", "data": {"rate": 387707.165043277, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5791.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08479100465774536}, "pipe": "data"} +{"event": "data", "data": {"rate": 384212.8839543423, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5791.8125, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08379324525594711}, "pipe": "data"} +{"event": "data", "data": {"rate": 379524.0211904309, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5791.8125, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08349813520908356}, "pipe": "data"} +{"event": "data", "data": {"rate": 382142.0878639916, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5791.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08437865972518921}, "pipe": "data"} +{"event": "data", "data": {"rate": 376865.15358129086, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5791.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08468881249427795}, "pipe": "data"} +{"event": "data", "data": {"rate": 380369.1977097986, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08403709530830383}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5791.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 377831.3628211679, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08420669287443161}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 379886.4500295244, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08320620656013489}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 371345.4072569379, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08308559656143188}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 376269.65747151565, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08385886251926422}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 375092.7086134967, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08402976393699646}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5569.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 379473.8025035265, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08415201306343079}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 369139.1555031587, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08402085304260254}, "pipe": "data"} +{"event": "data", "data": {"rate": 377018.469187657, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08228246122598648}, "pipe": "data"} +{"event": "data", "data": {"rate": 380442.850431833, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08508102595806122}, "pipe": "data"} +{"event": "data", "data": {"rate": 380623.6452450096, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08269783854484558}, "pipe": "data"} +{"event": "data", "data": {"rate": 381914.9561826947, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08433524519205093}, "pipe": "data"} +{"event": "data", "data": {"rate": 374173.10347631114, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08258543908596039}, "pipe": "data"} +{"event": "data", "data": {"rate": 382431.13601934566, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08313082903623581}, "pipe": "data"} +{"event": "data", "data": {"rate": 381339.51930086757, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08317264169454575}, "pipe": "data"} +{"event": "data", "data": {"rate": 376029.14280939166, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08357995748519897}, "pipe": "data"} +{"event": "data", "data": {"rate": 383762.0944846918, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08405735343694687}, "pipe": "data"} +{"event": "data", "data": {"rate": 383303.0711690678, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08272015303373337}, "pipe": "data"} +{"event": "data", "data": {"rate": 375138.7164930135, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08338481932878494}, "pipe": "data"} +{"event": "data", "data": {"rate": 386663.1241426989, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08422383666038513}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 382722.0521975041, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08405455201864243}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 383164.9160133561, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08417406678199768}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 384577.09699264885, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08274278044700623}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 381269.55594709533, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08461865782737732}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 380308.2144601289, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08379694819450378}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 383034.0688745473, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08401922881603241}, "pipe": "data"} +{"event": "data", "data": {"rate": 384572.2495162216, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08402209728956223}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 381272.69682574656, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08343112468719482}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 377910.14628079697, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5995.8125, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5773.8125, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/dlrm/voirconf-dlrm.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/dlrm/dlrm/dlrm_s_pytorch.py", "--num-batches", "1000", "--data-generation", "random", "--arch-mlp-bot", "512-512-64", "--arch-mlp-top", "1024-1024-1024-1", "--arch-sparse-feature-size", "64", "--arch-embedding-size", "1000000-1000000-1000000-1000000-1000000-1000000-1000000-1000000", "--num-indices-per-lookup", "100", "--arch-interaction-op", "dot", "--numpy-rand-seed", "727", "--print-freq", "999999", "--mini-batch-size", "16384", "--test-mini-batch-size", "16384", "--test-num-workers", "0", "--use-gpu"], "time": 1712256281.3275568, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/focalnet.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/focalnet.D0.data new file mode 100644 index 000000000..d9c93e03f --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/focalnet.D0.data @@ -0,0 +1,243 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "focalnet_base_lrf"}, "tags": ["classification", "convnet", "vision"], "weight": 2.0, "name": "focalnet", "tag": ["focalnet", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255930.817689, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-focalnet.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "focalnet_base_lrf", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/focalnet.D0", "--checkpoint-hist", "1"], "time": 1712255933.1109104}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model focalnet_base_lrf created, param count:88749768\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.9\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.05) calculated from base learning rate (0.1) and global batch size (128) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 7.004446029663086}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5645.8125, 24512.0], "load": 0.36, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 0/32 ( 0%)] Loss: 7.004 (7.00) Time: 17.246s, 7.42/s (17.246s, 7.42/s) LR: 1.000e-05 Data: 0.572 (0.572)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23613.8125, 24512.0], "load": 0.94, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [11819.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [9069.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [8879.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [8291.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.006705284118652}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0131425857543945}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24451.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.935508728027344}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.996306419372559}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24451.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 156.1831624908965, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.995440483093262}, "pipe": "data"} +{"event": "data", "data": {"rate": 168.9586201588784, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98853874206543}, "pipe": "data"} +{"event": "data", "data": {"rate": 159.01465219145024, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24453.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.060269355773926}, "pipe": "data"} +{"event": "data", "data": {"rate": 176.27258397976726, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 145.19968058021394, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.003750801086426}, "pipe": "data"} +{"event": "data", "data": {"rate": 172.14520218366917, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24453.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.056220054626465}, "pipe": "data"} +{"event": "data", "data": {"rate": 151.83865069816747, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.990062713623047}, "pipe": "data"} +{"event": "data", "data": {"rate": 175.40664860799035, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 145.45153278967254, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24453.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.029505729675293}, "pipe": "data"} +{"event": "data", "data": {"rate": 175.45757433453755, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9779558181762695}, "pipe": "data"} +{"event": "data", "data": {"rate": 145.02238583108993, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.975492477416992}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24453.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 175.56470216716664, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 144.91141828218414, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.979888916015625}, "pipe": "data"} +{"event": "data", "data": {"rate": 176.48576057662927, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.069397926330566}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24455.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 142.77113886157207, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.005284309387207}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 31/32 (100%)] Loss: 7.005 (7.00) Time: 0.723s, 177.16/s (1.253s, 102.19/s) LR: 1.000e-05 Data: 0.000 (0.026)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 167.20304259742375, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.810 (0.810) Loss: 6.9615 (6.9615) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 1.168 (0.296) Loss: 6.8639 (6.9459) Acc@1: 0.0000 ( 0.1453) Acc@5: 3.1250 ( 0.6541)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/focalnet.D0/20240404-183857-focalnet_base_lrf-224/checkpoint-0.pth.tar', 0.14534883720930233)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 144.034754776132, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6313.8125, 24512.0], "load": 0.83, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6313.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6313.8125, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4565.8125, 24512.0], "load": 0.82, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.020685195922852}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 0/32 ( 0%)] Loss: 7.021 (7.02) Time: 1.279s, 100.05/s (1.279s, 100.05/s) LR: 1.001e-02 Data: 0.458 (0.458)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 155.5760635598423, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.960961818695068}, "pipe": "data"} +{"event": "data", "data": {"rate": 143.27522267530088, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24357.8125, 24512.0], "load": 0.68, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0037946701049805}, "pipe": "data"} +{"event": "data", "data": {"rate": 142.41033852596325, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 130.77803795489464, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.034621238708496}, "pipe": "data"} +{"event": "data", "data": {"rate": 167.05587989425635, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [10173.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 111.55162576995241, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.963570594787598}, "pipe": "data"} +{"event": "data", "data": {"rate": 144.10494127118514, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.019160270690918}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24467.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 136.77740915494263, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.061408042907715}, "pipe": "data"} +{"event": "data", "data": {"rate": 171.38995765367892, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 139.07770271576695, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.034209728240967}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24469.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 176.73007257043497, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.999085903167725}, "pipe": "data"} +{"event": "data", "data": {"rate": 140.73386821241007, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.063625335693359}, "pipe": "data"} +{"event": "data", "data": {"rate": 156.30374000005074, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [22773.8125, 24512.0], "load": 0.95, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 118.87836763016499, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.087857246398926}, "pipe": "data"} +{"event": "data", "data": {"rate": 146.87189445188082, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 148.59329772368477, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.993795871734619}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24457.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 175.54020096884477, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.007907390594482}, "pipe": "data"} +{"event": "data", "data": {"rate": 132.59430062894253, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.070873260498047}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24451.8125, 24512.0], "load": 0.95, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 167.07431821359702, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 138.12219783714386, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.197940826416016}, "pipe": "data"} +{"event": "data", "data": {"rate": 166.13797086400245, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24311.8125, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 116.65194822861136, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.147008895874023}, "pipe": "data"} +{"event": "data", "data": {"rate": 167.51212007607452, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1084442138671875}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 31/32 (100%)] Loss: 7.108 (7.05) Time: 0.724s, 176.79/s (0.821s, 155.99/s) LR: 1.001e-02 Data: 0.000 (0.026)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 139.39110599911172, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.770 (0.770) Loss: 6.8921 (6.8921) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.067 (0.262) Loss: 6.9395 (6.9699) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 0.9932)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/focalnet.D0/20240404-183857-focalnet_base_lrf-224/checkpoint-1.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4631.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 176.76094961307055, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6173.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6173.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6173.8125, 24512.0], "load": 0.71, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.995844841003418}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 0/32 ( 0%)] Loss: 6.996 (7.00) Time: 1.241s, 103.11/s (1.241s, 103.11/s) LR: 2.001e-02 Data: 0.426 (0.426)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 133.72496430340055, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03409481048584}, "pipe": "data"} +{"event": "data", "data": {"rate": 157.239503859884, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24431.8125, 24512.0], "load": 0.8, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0975728034973145}, "pipe": "data"} +{"event": "data", "data": {"rate": 146.63419993022822, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.019063949584961}, "pipe": "data"} +{"event": "data", "data": {"rate": 157.34490851642497, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24433.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 134.12059622527286, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0097246170043945}, "pipe": "data"} +{"event": "data", "data": {"rate": 174.23972461465974, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.088225364685059}, "pipe": "data"} +{"event": "data", "data": {"rate": 137.43146636351116, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24433.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.010916709899902}, "pipe": "data"} +{"event": "data", "data": {"rate": 175.27376516031114, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 137.7812325799918, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.101739406585693}, "pipe": "data"} +{"event": "data", "data": {"rate": 174.27403734754023, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24433.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.231767654418945}, "pipe": "data"} +{"event": "data", "data": {"rate": 137.11038524135242, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.292592525482178}, "pipe": "data"} +{"event": "data", "data": {"rate": 174.9156869321893, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24433.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 137.36997214432094, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.112415313720703}, "pipe": "data"} +{"event": "data", "data": {"rate": 173.80298887533507, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.197756767272949}, "pipe": "data"} +{"event": "data", "data": {"rate": 136.54599227559748, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24433.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.235831260681152}, "pipe": "data"} +{"event": "data", "data": {"rate": 174.0547052075538, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 135.98386842916506, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.275215148925781}, "pipe": "data"} +{"event": "data", "data": {"rate": 172.65723573592533, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24433.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.224541664123535}, "pipe": "data"} +{"event": "data", "data": {"rate": 136.7135274497949, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "stop", "data": null, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-focalnet.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "focalnet_base_lrf", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/focalnet.D0", "--checkpoint-hist", "1"], "time": 1712256046.0211246, "return_code": -15}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/focalnet.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/focalnet.D1.data new file mode 100644 index 000000000..5a1319f7f --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/focalnet.D1.data @@ -0,0 +1,253 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "focalnet_base_lrf"}, "tags": ["classification", "convnet", "vision"], "weight": 2.0, "name": "focalnet", "tag": ["focalnet", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255933.09558, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-focalnet.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "focalnet_base_lrf", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/focalnet.D1", "--checkpoint-hist", "1"], "time": 1712255933.1184373}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model focalnet_base_lrf created, param count:88749768\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.9\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.05) calculated from base learning rate (0.1) and global batch size (128) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 7.004453659057617}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5645.8125, 24512.0], "load": 0.35, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 0/32 ( 0%)] Loss: 7.004 (7.00) Time: 17.370s, 7.37/s (17.370s, 7.37/s) LR: 1.000e-05 Data: 0.579 (0.579)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23531.8125, 24512.0], "load": 0.94, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [13601.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [11127.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [10937.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [10637.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.006714820861816}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.013139724731445}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.036353588104248}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [17027.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9829301834106445}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.996297359466553}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24241.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 115.98848552582137, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.995445728302002}, "pipe": "data"} +{"event": "data", "data": {"rate": 143.35521037626873, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.995490550994873}, "pipe": "data"} +{"event": "data", "data": {"rate": 151.07365948930976, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [18917.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.970373153686523}, "pipe": "data"} +{"event": "data", "data": {"rate": 133.24616771555847, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 171.80614457696456, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.009486198425293}, "pipe": "data"} +{"event": "data", "data": {"rate": 109.52578798411592, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24229.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.941388130187988}, "pipe": "data"} +{"event": "data", "data": {"rate": 173.86886058959016, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 115.87421045508268, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.961498260498047}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24453.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 143.38121255069655, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.990070819854736}, "pipe": "data"} +{"event": "data", "data": {"rate": 144.5995550692025, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.029526233673096}, "pipe": "data"} +{"event": "data", "data": {"rate": 132.0436047141948, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24427.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.946541786193848}, "pipe": "data"} +{"event": "data", "data": {"rate": 173.38762901915706, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 108.81581815830793, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03203821182251}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.975484371185303}, "pipe": "data"} +{"event": "data", "data": {"rate": 172.29524914479907, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [20309.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.979880332946777}, "pipe": "data"} +{"event": "data", "data": {"rate": 141.1807501037705, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.980224609375}, "pipe": "data"} +{"event": "data", "data": {"rate": 169.08453500743647, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24231.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 105.37381383095436, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.979129314422607}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.005289077758789}, "pipe": "data"} +{"event": "data", "data": {"rate": 170.7990981385958, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 31/32 (100%)] Loss: 7.005 (7.00) Time: 1.059s, 120.84/s (1.373s, 93.21/s) LR: 1.000e-05 Data: 0.000 (0.037)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.750 (0.750) Loss: 6.9615 (6.9615) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 1.178 (0.293) Loss: 6.8639 (6.9459) Acc@1: 0.0000 ( 0.1453) Acc@5: 3.1250 ( 0.6541)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/focalnet.D1/20240404-183857-focalnet_base_lrf-224/checkpoint-0.pth.tar', 0.14534883720930233)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [11701.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 120.76732395043811, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [11945.8125, 24512.0], "load": 0.86, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [11945.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [8217.8125, 24512.0], "load": 0.95, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.020687103271484}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 0/32 ( 0%)] Loss: 7.021 (7.02) Time: 1.248s, 102.53/s (1.248s, 102.53/s) LR: 1.001e-02 Data: 0.442 (0.442)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24245.8125, 24512.0], "load": 0.55, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 136.4557667811463, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.017704486846924}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.003789901733398}, "pipe": "data"} +{"event": "data", "data": {"rate": 161.30785485007573, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 116.98455657074206, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0345940589904785}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24321.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 141.95076724644764, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0620293617248535}, "pipe": "data"} +{"event": "data", "data": {"rate": 162.53874838047597, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.065659523010254}, "pipe": "data"} +{"event": "data", "data": {"rate": 128.57425847507284, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24427.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.019184589385986}, "pipe": "data"} +{"event": "data", "data": {"rate": 172.53952896110098, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 108.6217930442419, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.061338901519775}, "pipe": "data"} +{"event": "data", "data": {"rate": 172.91723824200844, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077922344207764}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [10109.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0374860763549805}, "pipe": "data"} +{"event": "data", "data": {"rate": 132.9027164186082, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.999045372009277}, "pipe": "data"} +{"event": "data", "data": {"rate": 171.79069501659433, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24255.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 107.25865138624613, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0636396408081055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077389717102051}, "pipe": "data"} +{"event": "data", "data": {"rate": 172.08750011312569, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [11651.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.036355495452881}, "pipe": "data"} +{"event": "data", "data": {"rate": 138.26715500832663, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9937944412231445}, "pipe": "data"} +{"event": "data", "data": {"rate": 170.3041171311696, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [19013.8125, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 99.81872724174447, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0078935623168945}, "pipe": "data"} +{"event": "data", "data": {"rate": 163.18666430419972, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.098411560058594}, "pipe": "data"} +{"event": "data", "data": {"rate": 155.33655016720704, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24243.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043401718139648}, "pipe": "data"} +{"event": "data", "data": {"rate": 138.53100217684747, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.197937488555908}, "pipe": "data"} +{"event": "data", "data": {"rate": 158.869438605572, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 107.38729979115578, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.147041320800781}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24389.8125, 24512.0], "load": 0.95, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 173.4800351890698, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9818925857543945}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 31/32 (100%)] Loss: 7.108 (7.05) Time: 0.737s, 173.70/s (0.885s, 144.66/s) LR: 1.001e-02 Data: 0.000 (0.033)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.602 (0.602) Loss: 6.8922 (6.8922) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.067 (0.256) Loss: 6.9393 (6.9700) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 0.9932)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/focalnet.D1/20240404-183857-focalnet_base_lrf-224/checkpoint-1.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 141.3450924514315, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24343.8125, 24512.0], "load": 0.86, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24417.8125, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24417.8125, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24417.8125, 24512.0], "load": 0.65, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.995919227600098}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 0/32 ( 0%)] Loss: 6.996 (7.00) Time: 1.448s, 88.39/s (1.448s, 88.39/s) LR: 2.001e-02 Data: 0.471 (0.471)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 128.8976399421159, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.034114360809326}, "pipe": "data"} +{"event": "data", "data": {"rate": 168.03100513357694, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24279.8125, 24512.0], "load": 0.92, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1502685546875}, "pipe": "data"} +{"event": "data", "data": {"rate": 73.15334612043536, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.097559928894043}, "pipe": "data"} +{"event": "data", "data": {"rate": 158.33936876412403, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24441.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.019028663635254}, "pipe": "data"} +{"event": "data", "data": {"rate": 144.59166895573824, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 138.65953494458603, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0096940994262695}, "pipe": "data"} +{"event": "data", "data": {"rate": 162.1534673672443, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24447.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.088198661804199}, "pipe": "data"} +{"event": "data", "data": {"rate": 141.75870888896802, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.160571098327637}, "pipe": "data"} +{"event": "data", "data": {"rate": 159.92693657938904, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 104.13960070986302, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.106841087341309}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24427.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 175.52545067508473, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.141146659851074}, "pipe": "data"} +{"event": "data", "data": {"rate": 138.59470403454478, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.052305698394775}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24429.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 176.52135102210215, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 138.84214160154892, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.252406120300293}, "pipe": "data"} +{"event": "data", "data": {"rate": 176.64217808990978, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08305025100708}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24429.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 139.38617068063758, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.190241813659668}, "pipe": "data"} +{"event": "data", "data": {"rate": 176.09690045620206, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 138.06042983535764, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.183157920837402}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24429.8125, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 175.24722380861084, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "stop", "data": null, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-focalnet.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "focalnet_base_lrf", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/focalnet.D1", "--checkpoint-hist", "1"], "time": 1712256051.804079, "return_code": -15}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/fp16.D0.data new file mode 100644 index 000000000..96bd83b08 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/fp16.D0.data @@ -0,0 +1,220 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 30, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp16"}, "weight": 0.0, "name": "fp16", "tag": ["fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0.02, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255396.49982, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "30", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp16"], "time": 1712255398.7885}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 92.7523917431756, "units": "Tflops", "t": 1712255400.769592}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2410.3125, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712255399.9138083}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.07, "temperature": null, "power": null}}, "t": 1712255400.419759}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 95.38028414920187, "units": "Tflops", "t": 1712255401.4619455}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.27, "temperature": null, "power": null}}, "t": 1712255400.9253342}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.4, "temperature": null, "power": null}}, "t": 1712255401.430599}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 95.34183232162187, "units": "Tflops", "t": 1712255402.1540887}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.6, "temperature": null, "power": null}}, "t": 1712255401.9360483}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 96.02837774345825, "units": "Tflops", "t": 1712255402.8416667}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.73, "temperature": null, "power": null}}, "t": 1712255402.4414482}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 95.08794347175838, "units": "Tflops", "t": 1712255403.5355804}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.93, "temperature": null, "power": null}}, "t": 1712255402.9468606}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255403.4521775}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 95.29681409851152, "units": "Tflops", "t": 1712255404.2281904}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255403.9573402}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 95.4878508372297, "units": "Tflops", "t": 1712255404.9196634}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255404.462785}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 95.2403634007459, "units": "Tflops", "t": 1712255405.6124618}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255404.9682617}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255405.4735107}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 95.00478836738485, "units": "Tflops", "t": 1712255406.3071556}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255405.9788024}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 95.16211233059299, "units": "Tflops", "t": 1712255407.0006332}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255406.4843073}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255406.9896882}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 95.03046701273247, "units": "Tflops", "t": 1712255407.6951482}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255407.4949334}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.88428113393039, "units": "Tflops", "t": 1712255408.3906512}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255408.0002701}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.85455163008706, "units": "Tflops", "t": 1712255409.0864003}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255408.5055943}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255409.0108666}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.89957599090012, "units": "Tflops", "t": 1712255409.7818384}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255409.5161426}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.81067450149848, "units": "Tflops", "t": 1712255410.4779058}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255410.0214255}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.49556624211912, "units": "Tflops", "t": 1712255411.1761541}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255410.5267718}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255411.0322652}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.58646407597354, "units": "Tflops", "t": 1712255411.8738847}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255411.5376697}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.67850882338522, "units": "Tflops", "t": 1712255412.570939}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255412.0432758}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255412.5487695}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.56254365191127, "units": "Tflops", "t": 1712255413.2687232}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255413.054106}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.47691730850947, "units": "Tflops", "t": 1712255413.9672155}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255413.5594823}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.41131740448215, "units": "Tflops", "t": 1712255414.6660929}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255414.064878}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255414.5701962}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.24031660309983, "units": "Tflops", "t": 1712255415.3664112}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255415.0758157}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.9745359982242, "units": "Tflops", "t": 1712255416.0685322}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255415.5812733}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.17116655894259, "units": "Tflops", "t": 1712255416.769187}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255416.0866477}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255416.5919538}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.23405813186558, "units": "Tflops", "t": 1712255417.4695296}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255417.0973232}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.86773867414098, "units": "Tflops", "t": 1712255418.1724594}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255417.602632}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255418.107981}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.04863966730076, "units": "Tflops", "t": 1712255418.8741972}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255418.6133192}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.09251806287995, "units": "Tflops", "t": 1712255419.5754375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255419.1190927}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.95475220111709, "units": "Tflops", "t": 1712255420.2777157}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255419.6245365}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255420.1299493}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.74437980091288, "units": "Tflops", "t": 1712255420.9817104}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255420.6352532}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.75340048819439, "units": "Tflops", "t": 1712255421.6854718}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255421.1406293}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255421.6459143}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.8057483682863, "units": "Tflops", "t": 1712255422.3888752}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255422.1523018}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.69914374365054, "units": "Tflops", "t": 1712255423.0930595}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255422.6577103}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.73355091676696, "units": "Tflops", "t": 1712255423.7971275}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255423.1632264}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255423.6684918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.24995841535832, "units": "Tflops", "t": 1712255424.5047314}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255424.1737797}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.86779068743131, "units": "Tflops", "t": 1712255425.2152245}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255424.679123}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255425.184458}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.82121679001737, "units": "Tflops", "t": 1712255425.9262137}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255425.6900592}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.73849600228822, "units": "Tflops", "t": 1712255426.637683}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255426.1955636}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.77780031553147, "units": "Tflops", "t": 1712255427.3488507}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255426.7011583}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255427.207792}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.7771470445031, "units": "Tflops", "t": 1712255428.0600748}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255427.7144244}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.66585212562694, "units": "Tflops", "t": 1712255428.772114}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255428.2211819}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255428.7288618}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.68096782561825, "units": "Tflops", "t": 1712255429.4842148}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255429.2375152}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.5469300287417, "units": "Tflops", "t": 1712255430.1971722}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255429.7460918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.47554391588781, "units": "Tflops", "t": 1712255430.91068}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255430.254856}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255430.7634194}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.6561397094833, "units": "Tflops", "t": 1712255431.6228282}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255431.2721915}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.44211571139982, "units": "Tflops", "t": 1712255432.3365848}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255431.7809808}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255432.2894936}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.93963611914799, "units": "Tflops", "t": 1712255433.0544062}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255432.798075}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.19462984549327, "units": "Tflops", "t": 1712255433.7700806}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255433.306659}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.41596465083921, "units": "Tflops", "t": 1712255434.484032}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255433.8152964}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255434.32396}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.24543553886119, "units": "Tflops", "t": 1712255435.199348}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255434.8323677}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.14538366074498, "units": "Tflops", "t": 1712255435.9154084}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255435.341082}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255435.849765}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.03810870817333, "units": "Tflops", "t": 1712255436.6323333}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255436.358606}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.15360816518283, "units": "Tflops", "t": 1712255437.3484674}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255436.8673189}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.16257086384087, "units": "Tflops", "t": 1712255438.064383}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255437.3759212}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255437.8843844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.913035786688, "units": "Tflops", "t": 1712255438.782285}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255438.3931031}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.31655909424165, "units": "Tflops", "t": 1712255439.4970264}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255438.9014273}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255439.4100592}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.28362759715372, "units": "Tflops", "t": 1712255440.2043805}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255439.9188275}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.4166239386077, "units": "Tflops", "t": 1712255440.9106872}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255440.427361}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.95405230453973, "units": "Tflops", "t": 1712255441.620634}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255440.9359872}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255441.4452436}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.0262106571359, "units": "Tflops", "t": 1712255442.3299453}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255441.954898}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.31835966290977, "units": "Tflops", "t": 1712255443.036986}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255442.4645197}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255442.9743228}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.11261029714248, "units": "Tflops", "t": 1712255443.745626}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255443.4839365}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.08632086428433, "units": "Tflops", "t": 1712255444.4621625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255443.9936693}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.90876448168352, "units": "Tflops", "t": 1712255445.172344}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255444.5033863}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255445.0129802}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.88397008176685, "units": "Tflops", "t": 1712255445.8827431}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255445.5226243}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.74866093405363, "units": "Tflops", "t": 1712255446.594287}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255446.0324113}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255446.5421023}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.55736261315175, "units": "Tflops", "t": 1712255447.3072171}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255447.0520995}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.85333064393474, "units": "Tflops", "t": 1712255448.0178094}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255447.561885}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.44881795164859, "units": "Tflops", "t": 1712255448.731512}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255448.0715935}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255448.5813105}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.8619624988399, "units": "Tflops", "t": 1712255449.4420943}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255449.0910022}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.25380094588698, "units": "Tflops", "t": 1712255450.1573052}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255449.600829}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255450.1105406}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.42522543565042, "units": "Tflops", "t": 1712255450.8712175}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255450.620416}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.23421228434661, "units": "Tflops", "t": 1712255451.5865765}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255451.1301012}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.73277726486148, "units": "Tflops", "t": 1712255452.298232}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255451.6397965}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255452.1494653}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.5499326383956, "units": "Tflops", "t": 1712255453.0111861}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255452.6591728}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.56572276085, "units": "Tflops", "t": 1712255453.7239933}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255453.1688535}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255453.6786718}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.61836443605067, "units": "Tflops", "t": 1712255454.436435}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255454.1893733}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.61737239747556, "units": "Tflops", "t": 1712255455.1488369}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255454.6990778}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.56432929802386, "units": "Tflops", "t": 1712255455.8616416}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255455.2088258}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255455.7185621}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.3846152401066, "units": "Tflops", "t": 1712255456.5758774}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255456.2282014}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.5823546553951, "units": "Tflops", "t": 1712255457.2885575}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255456.7379234}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255457.2477999}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.30461030926529, "units": "Tflops", "t": 1712255458.0034006}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255457.757414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.19487559429736, "units": "Tflops", "t": 1712255458.7190769}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255458.267144}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.25663076518778, "units": "Tflops", "t": 1712255459.4344037}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255458.7770362}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255459.2868426}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.48599132421646, "units": "Tflops", "t": 1712255460.1478708}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255459.796605}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.84131305164908, "units": "Tflops", "t": 1712255460.8663042}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255460.306355}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255460.816263}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.00135561139192, "units": "Tflops", "t": 1712255461.5835097}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255461.3261979}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.21905753839916, "units": "Tflops", "t": 1712255462.2989829}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255461.8358598}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.31622029742022, "units": "Tflops", "t": 1712255463.013707}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255462.345675}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255462.855472}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.07219504770241, "units": "Tflops", "t": 1712255463.7303734}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255463.3652358}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "30", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp16"], "time": 1712255464.2598617, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/fp16.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/fp16.D1.data new file mode 100644 index 000000000..c6897ad33 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/fp16.D1.data @@ -0,0 +1,220 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 30, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp16"}, "weight": 0.0, "name": "fp16", "tag": ["fp16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0.02, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255398.780101, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "30", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp16"], "time": 1712255398.7889414}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 92.40534790138327, "units": "Tflops", "t": 1712255400.7453833}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2412.5, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712255399.89603}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.08, "temperature": null, "power": null}}, "t": 1712255400.4021287}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 95.44302386018786, "units": "Tflops", "t": 1712255401.437205}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.21, "temperature": null, "power": null}}, "t": 1712255400.9076393}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.41, "temperature": null, "power": null}}, "t": 1712255401.4135976}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 95.83009610178632, "units": "Tflops", "t": 1712255402.1257973}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.61, "temperature": null, "power": null}}, "t": 1712255401.9190102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 95.44799524026035, "units": "Tflops", "t": 1712255402.8180318}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.74, "temperature": null, "power": null}}, "t": 1712255402.4249086}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 95.338744374932, "units": "Tflops", "t": 1712255403.5101178}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.94, "temperature": null, "power": null}}, "t": 1712255402.9307432}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255403.4365292}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.8885762128826, "units": "Tflops", "t": 1712255404.2055328}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255403.9423325}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.85962449378471, "units": "Tflops", "t": 1712255404.9016497}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255404.4480522}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.59655307115918, "units": "Tflops", "t": 1712255405.599288}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255404.9536836}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255405.4594023}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.69889031492922, "units": "Tflops", "t": 1712255406.2960696}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255405.9650977}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.67931873114617, "units": "Tflops", "t": 1712255406.992963}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255406.470816}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255406.976712}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.33210663664435, "units": "Tflops", "t": 1712255407.6924787}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255407.4822276}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.54079949871489, "units": "Tflops", "t": 1712255408.3905416}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255407.9878407}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.71915088784482, "units": "Tflops", "t": 1712255409.0872548}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255408.4933822}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255408.9990942}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.26641857440723, "units": "Tflops", "t": 1712255409.7873833}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255409.5046654}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.61000841319243, "units": "Tflops", "t": 1712255410.4849021}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255410.0102322}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.08595928704538, "units": "Tflops", "t": 1712255411.1861985}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255410.5159712}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255411.0217168}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.01502300950892, "units": "Tflops", "t": 1712255411.8882122}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255411.5272825}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.10349405985436, "units": "Tflops", "t": 1712255412.5894794}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255412.0329535}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255412.5385776}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.17844241886912, "units": "Tflops", "t": 1712255413.290129}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255413.0442328}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.02745070312777, "units": "Tflops", "t": 1712255413.991996}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255413.5496633}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.87305685136205, "units": "Tflops", "t": 1712255414.694878}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255414.0553644}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255414.5610058}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.80791092172524, "units": "Tflops", "t": 1712255415.3983996}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255415.066771}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.80638440306514, "units": "Tflops", "t": 1712255416.101779}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255415.572423}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255416.078242}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.53551650881191, "units": "Tflops", "t": 1712255416.8073745}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255416.5838513}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.99863881086047, "units": "Tflops", "t": 1712255417.5093246}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255417.0895095}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.63357350361801, "units": "Tflops", "t": 1712255418.2141116}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255417.5950634}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255418.1007984}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.68328178764803, "units": "Tflops", "t": 1712255418.9184465}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255418.606383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.81882061579176, "units": "Tflops", "t": 1712255419.6217406}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255419.111954}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255419.617523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.66111577292718, "units": "Tflops", "t": 1712255420.3263981}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255420.1230996}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.62083790996782, "units": "Tflops", "t": 1712255421.031169}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255420.628865}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.67243530235666, "units": "Tflops", "t": 1712255421.735551}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255421.134295}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255421.6398802}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.67278412682683, "units": "Tflops", "t": 1712255422.440094}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255422.1455631}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.36713514444455, "units": "Tflops", "t": 1712255423.1467912}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255422.651164}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.41425863104064, "units": "Tflops", "t": 1712255423.8531208}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255423.1567166}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255423.6623175}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.41198804846171, "units": "Tflops", "t": 1712255424.5595074}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255424.1679072}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.42819990432454, "units": "Tflops", "t": 1712255425.265889}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255424.6735702}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255425.1793377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.96804395842739, "units": "Tflops", "t": 1712255425.9756455}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255425.6850648}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.15511763709628, "units": "Tflops", "t": 1712255426.683952}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255426.19075}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.85052641552609, "units": "Tflops", "t": 1712255427.394575}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255426.6975405}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255427.2042375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.33718366624466, "units": "Tflops", "t": 1712255428.1016343}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255427.7108784}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.87891926101426, "units": "Tflops", "t": 1712255428.8120377}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255428.2188647}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255428.7275236}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.83576027392415, "units": "Tflops", "t": 1712255429.5228136}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255429.236183}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.43566147698782, "units": "Tflops", "t": 1712255430.2366183}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255429.7447622}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.00679283055533, "units": "Tflops", "t": 1712255430.946046}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255430.253511}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255430.7620747}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.85990566575383, "units": "Tflops", "t": 1712255431.6567812}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255431.270854}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.01826744224479, "units": "Tflops", "t": 1712255432.3661268}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255431.7795641}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255432.2881606}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.00517950266314, "units": "Tflops", "t": 1712255433.0833066}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255432.7967415}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.85205314109872, "units": "Tflops", "t": 1712255433.793908}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255433.3053286}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.7734764545604, "units": "Tflops", "t": 1712255434.5051248}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255433.81396}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255434.3225067}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.70521911295624, "units": "Tflops", "t": 1712255435.2170057}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255434.8310323}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.63414690758026, "units": "Tflops", "t": 1712255435.929278}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255435.3397274}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255435.848439}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.56847884420803, "units": "Tflops", "t": 1712255436.6421092}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255436.357246}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.74295062597577, "units": "Tflops", "t": 1712255437.3459597}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255436.8658764}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.55958442597641, "units": "Tflops", "t": 1712255438.0511887}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255437.3745108}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255437.8830457}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.78381044891762, "units": "Tflops", "t": 1712255438.754782}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255438.3917675}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.38140901244707, "units": "Tflops", "t": 1712255439.4613802}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255438.900088}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255439.4087217}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.53912116380273, "units": "Tflops", "t": 1712255440.1669505}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255439.917489}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.70872696725468, "units": "Tflops", "t": 1712255440.871061}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255440.4260275}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.67513083169872, "units": "Tflops", "t": 1712255441.5754418}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255440.9346428}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255441.445216}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.72037544409226, "units": "Tflops", "t": 1712255442.2795045}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255441.9549391}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.71570935776161, "units": "Tflops", "t": 1712255442.9835644}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255442.464548}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255442.9743505}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.61877899755696, "units": "Tflops", "t": 1712255443.6884096}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255443.4840288}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.47993250880008, "units": "Tflops", "t": 1712255444.3943682}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255443.993698}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.39146320744268, "units": "Tflops", "t": 1712255445.1008766}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255444.503415}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255445.0130074}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.5479758817099, "units": "Tflops", "t": 1712255445.8062387}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255445.52265}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.1619864010478, "units": "Tflops", "t": 1712255446.5144856}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255446.0324392}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.12238731065332, "units": "Tflops", "t": 1712255447.2230337}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255446.5421906}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255447.0521271}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.41880012736345, "units": "Tflops", "t": 1712255447.929373}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255447.561914}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.90895165955888, "units": "Tflops", "t": 1712255448.6395512}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255448.071647}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255448.5813386}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.93260453216449, "units": "Tflops", "t": 1712255449.349593}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255449.0910294}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.9579870315059, "units": "Tflops", "t": 1712255450.0593996}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255449.600858}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.81533217842181, "units": "Tflops", "t": 1712255450.7704504}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255450.1105695}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255450.620444}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.80758057510381, "units": "Tflops", "t": 1712255451.4814398}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255451.130142}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.74909617965268, "units": "Tflops", "t": 1712255452.1928413}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255451.6398253}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255452.1494927}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.70459792293097, "units": "Tflops", "t": 1712255452.9046192}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255452.6592133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.70105729874315, "units": "Tflops", "t": 1712255453.6163921}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255453.1689467}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.63749633591054, "units": "Tflops", "t": 1712255454.3286526}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255453.6787007}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255454.1894336}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.63690706707268, "units": "Tflops", "t": 1712255455.0409474}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255454.6991057}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.58213781385389, "units": "Tflops", "t": 1712255455.7536378}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255455.2088523}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255455.71859}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.50918187577987, "units": "Tflops", "t": 1712255456.467034}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255456.2282295}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.61402442417489, "units": "Tflops", "t": 1712255457.1794703}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255456.737953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.21583049743126, "units": "Tflops", "t": 1712255457.8949811}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255457.247826}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255457.757441}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.00453706671458, "units": "Tflops", "t": 1712255458.612179}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255458.2671707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.95396575340398, "units": "Tflops", "t": 1712255459.3297238}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255458.7770627}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255459.2868712}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.23934696871284, "units": "Tflops", "t": 1712255460.045095}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255459.796631}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.14427898840967, "units": "Tflops", "t": 1712255460.7611742}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255460.3063846}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.85378250617472, "units": "Tflops", "t": 1712255461.4795084}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255460.8163052}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255461.3262272}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.0702649581587, "units": "Tflops", "t": 1712255462.1961985}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255461.8359008}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.1872887041371, "units": "Tflops", "t": 1712255462.911933}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255462.3457048}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255462.8554997}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.07418649474786, "units": "Tflops", "t": 1712255463.6286023}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3097.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255463.3652637}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "30", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp16"], "time": 1712255464.3165908, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/fp32.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/fp32.D0.data new file mode 100644 index 000000000..0e339db77 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/fp32.D0.data @@ -0,0 +1,345 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp32"}, "weight": 0.0, "name": "fp32", "tag": ["fp32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255545.340192, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32"], "time": 1712255547.6680193}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 15.910726942115057, "units": "Tflops", "t": 1712255550.303062}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712255548.7817533}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.11, "temperature": null, "power": null}}, "t": 1712255549.2890816}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.24, "temperature": null, "power": null}}, "t": 1712255549.79447}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.44, "temperature": null, "power": null}}, "t": 1712255550.300143}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.856185806535132, "units": "Tflops", "t": 1712255551.6907296}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.57, "temperature": null, "power": null}}, "t": 1712255550.8055346}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.77, "temperature": null, "power": null}}, "t": 1712255551.3109431}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.791533519580378, "units": "Tflops", "t": 1712255553.0845685}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.9, "temperature": null, "power": null}}, "t": 1712255551.816372}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}, "t": 1712255552.3217378}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}, "t": 1712255552.827104}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.814345117151015, "units": "Tflops", "t": 1712255554.4760323}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}, "t": 1712255553.3326104}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}, "t": 1712255553.838185}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}, "t": 1712255554.3435953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.760160026503572, "units": "Tflops", "t": 1712255555.8718276}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255554.8491783}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255555.3545823}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255555.8599298}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.73176006759587, "units": "Tflops", "t": 1712255557.2701175}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255556.3653371}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255556.8707101}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.706762229774338, "units": "Tflops", "t": 1712255558.6704862}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255557.3763957}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255557.8817732}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255558.387393}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.7089852637417, "units": "Tflops", "t": 1712255560.0706732}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255558.8928373}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255559.3988273}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255559.9042041}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.70406119425526, "units": "Tflops", "t": 1712255561.4713368}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255560.4097006}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255560.9151978}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255561.4206011}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.708875568608, "units": "Tflops", "t": 1712255562.8715458}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255561.9261262}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255562.431604}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.695410765420805, "units": "Tflops", "t": 1712255564.2729542}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255562.9371133}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255563.4424784}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255563.9479754}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.704676199861359, "units": "Tflops", "t": 1712255565.6734145}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255564.4534698}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255564.958886}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255565.4643083}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.63849032320296, "units": "Tflops", "t": 1712255567.079912}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255565.9697223}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255566.4753246}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255566.9807286}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.66426869659584, "units": "Tflops", "t": 1712255568.4841135}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255567.486238}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255567.9918022}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.65046062362572, "units": "Tflops", "t": 1712255569.8893833}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255568.4972694}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255569.0026352}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255569.5080934}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.728750011050055, "units": "Tflops", "t": 1712255571.2878015}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255570.0136085}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255570.5189857}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255571.024495}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.889911177988383, "units": "Tflops", "t": 1712255572.6719346}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255571.5303214}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255572.0357168}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255572.5411687}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.826081141049558, "units": "Tflops", "t": 1712255574.0617833}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255573.0466516}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255573.5519965}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255574.0574956}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.867670099370265, "units": "Tflops", "t": 1712255575.4478362}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255574.563711}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255575.069092}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.809582412552908, "units": "Tflops", "t": 1712255576.8389657}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255575.574485}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255576.0798798}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255576.58535}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.826445032484301, "units": "Tflops", "t": 1712255578.228757}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255577.090967}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255577.5963519}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255578.1018555}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.811398244916061, "units": "Tflops", "t": 1712255579.6197503}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255578.6072128}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255579.1125522}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255579.6179092}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.791063089377742, "units": "Tflops", "t": 1712255581.0125449}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255580.123556}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255580.628886}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.829614854308845, "units": "Tflops", "t": 1712255582.4020793}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255581.1342416}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255581.6397645}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255582.1452425}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.774675025991804, "units": "Tflops", "t": 1712255583.796309}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255582.6507223}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255583.1561096}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255583.6615043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.787978951217191, "units": "Tflops", "t": 1712255585.1893754}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255584.1670303}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255584.6724446}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255585.1779106}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.799299666614093, "units": "Tflops", "t": 1712255586.5815606}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255585.683302}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255586.1888883}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.722232171770562, "units": "Tflops", "t": 1712255587.9804113}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255586.6942391}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255587.1997223}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255587.7052448}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.728216262102675, "units": "Tflops", "t": 1712255589.378773}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255588.2106667}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255588.7160475}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255589.2216003}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.764930720094418, "units": "Tflops", "t": 1712255590.7740073}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255589.7270205}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255590.232445}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255590.7379248}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.768725631060855, "units": "Tflops", "t": 1712255592.1687796}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255591.2433767}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255591.7487893}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.697796230731747, "units": "Tflops", "t": 1712255593.5697937}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255592.254256}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255592.7596364}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255593.2651298}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.70776532604224, "units": "Tflops", "t": 1712255594.969962}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255593.7705636}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255594.2761729}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255594.7815537}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.731671520076944, "units": "Tflops", "t": 1712255596.3681211}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255595.2872436}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255595.7926126}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255596.2981296}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.698122184867804, "units": "Tflops", "t": 1712255597.7691581}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255596.8037198}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255597.309168}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.724963590675703, "units": "Tflops", "t": 1712255599.1677573}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255597.8147457}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255598.3201616}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255598.8257203}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.66051325436683, "units": "Tflops", "t": 1712255600.572145}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255599.3311179}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255599.8365905}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255600.3431256}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.708995965787986, "units": "Tflops", "t": 1712255601.97222}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255600.8485634}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255601.3542163}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255601.8597043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.734113650298097, "units": "Tflops", "t": 1712255603.3702004}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255602.3651214}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255602.870679}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.705756587202982, "units": "Tflops", "t": 1712255604.7705188}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255603.376286}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255603.8819466}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255604.387317}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.698156918483635, "units": "Tflops", "t": 1712255606.1715412}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255604.892803}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255605.3985481}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255605.9039252}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.699843019570608, "units": "Tflops", "t": 1712255607.5724313}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255606.4093637}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255606.9148562}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255607.4204767}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.672317575761674, "units": "Tflops", "t": 1712255608.9757614}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255607.9259338}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255608.431323}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255608.9368682}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.681217160293855, "units": "Tflops", "t": 1712255610.3784306}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255609.4424253}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255609.9479496}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.642370504279942, "units": "Tflops", "t": 1712255611.784417}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255610.4536211}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255610.9591603}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255611.4646447}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.655818825629723, "units": "Tflops", "t": 1712255613.189246}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255611.9701402}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255612.4756014}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255612.9811418}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.683200959377295, "units": "Tflops", "t": 1712255614.5916069}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255613.486701}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255613.9921622}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255614.4976285}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.657945058422127, "units": "Tflops", "t": 1712255615.9962435}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255615.0029662}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255615.508457}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.65166902010008, "units": "Tflops", "t": 1712255617.4013913}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255616.0139093}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255616.5194578}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255617.024808}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.60774781288782, "units": "Tflops", "t": 1712255618.8105342}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255617.530576}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255618.0361245}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255618.5414977}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.64558643961625, "units": "Tflops", "t": 1712255620.2164268}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255619.0468614}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255619.5523326}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255620.057975}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.61102350877372, "units": "Tflops", "t": 1712255621.6252875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255620.5639396}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255621.0693052}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255621.5747862}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.644455935408924, "units": "Tflops", "t": 1712255623.0311265}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255622.0802267}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255622.585605}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.596623359621637, "units": "Tflops", "t": 1712255624.4412355}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255623.0911338}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255623.5965674}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255624.1019561}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.622343160160543, "units": "Tflops", "t": 1712255625.8490694}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255624.6074588}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255625.1131208}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255625.6184845}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.628513603874548, "units": "Tflops", "t": 1712255627.2563362}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255626.1238346}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255626.6293097}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255627.134831}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.590818027442614, "units": "Tflops", "t": 1712255628.6671333}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255627.6403165}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255628.1459706}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255628.6515367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.642882524290647, "units": "Tflops", "t": 1712255630.073115}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255629.1569285}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255629.6623812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.587102985739763, "units": "Tflops", "t": 1712255631.4840946}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255630.1678379}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255630.6733983}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255631.1787448}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.616666740355537, "units": "Tflops", "t": 1712255632.8924332}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255631.6843183}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255632.1897936}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255632.695194}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.569839445954809, "units": "Tflops", "t": 1712255634.305002}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255633.2006707}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255633.7061725}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255634.2118294}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.581378439159183, "units": "Tflops", "t": 1712255635.7165217}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255634.7171934}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255635.2227345}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.595354886713398, "units": "Tflops", "t": 1712255637.1267536}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255635.7282622}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255636.233739}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255636.7392259}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.557212722859132, "units": "Tflops", "t": 1712255638.5404723}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255637.2448416}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255637.7503226}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255638.2557552}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.593364248620146, "units": "Tflops", "t": 1712255639.9510727}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255638.761263}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255639.2666855}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255639.772051}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.590235623738554, "units": "Tflops", "t": 1712255641.3617966}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255640.2774434}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255640.7829378}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255641.2884593}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.563759884065002, "units": "Tflops", "t": 1712255642.7749298}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255641.7938502}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255642.2992673}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.586407600234109, "units": "Tflops", "t": 1712255644.1859736}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255642.8048894}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255643.310384}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255643.815686}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.561344091010326, "units": "Tflops", "t": 1712255645.599321}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255644.3212526}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255644.8265646}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255645.3320694}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.549897659154153, "units": "Tflops", "t": 1712255647.0137086}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255645.8372893}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255646.3428822}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255646.8483977}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.577367969870629, "units": "Tflops", "t": 1712255648.4255903}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255647.3539088}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255647.859561}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255648.3650525}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.545722582078248, "units": "Tflops", "t": 1712255649.840362}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255648.8704834}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255649.3758738}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.525782375547141, "units": "Tflops", "t": 1712255651.2570674}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255649.8814263}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255650.3868208}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255650.8942258}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.552527562096355, "units": "Tflops", "t": 1712255652.6712215}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255651.4004166}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255651.9060063}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255652.4114156}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.569077269937086, "units": "Tflops", "t": 1712255654.0838726}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255652.917131}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255653.4225712}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255653.9280612}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.530807087566235, "units": "Tflops", "t": 1712255655.4999828}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255654.4334183}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255654.9390645}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255655.4446144}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.546304285106634, "units": "Tflops", "t": 1712255656.914702}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255655.9500153}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255656.4555209}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.543417160458414, "units": "Tflops", "t": 1712255658.3296328}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255656.9609184}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255657.4663062}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255657.971797}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.550796917709162, "units": "Tflops", "t": 1712255659.7439384}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255658.477359}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255658.9828982}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255659.48829}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.545793327418611, "units": "Tflops", "t": 1712255661.158698}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255659.9937115}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255660.4992616}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255661.0046268}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.491811215556384, "units": "Tflops", "t": 1712255662.5783842}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255661.510044}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255662.015635}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255662.521077}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.539125134956947, "units": "Tflops", "t": 1712255663.993747}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255663.02651}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255663.531993}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.527653844517225, "units": "Tflops", "t": 1712255665.4101334}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255664.0374837}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255664.5430436}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255665.0484688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.525889528493186, "units": "Tflops", "t": 1712255666.8268692}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255665.5540493}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255666.059478}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255666.5648825}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.526762488085254, "units": "Tflops", "t": 1712255668.2433631}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255667.070284}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255667.5757911}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255668.081366}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.535416367659906, "units": "Tflops", "t": 1712255669.6590788}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255668.586842}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255669.0922627}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255669.5983434}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.512345106668553, "units": "Tflops", "t": 1712255671.0768833}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255670.1037605}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255670.609235}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.522440468893725, "units": "Tflops", "t": 1712255672.4937396}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255671.1146243}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255671.6200562}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255672.1254392}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.506980359493857, "units": "Tflops", "t": 1712255673.9120388}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255672.6307805}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255673.1362188}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255673.6417248}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.500850784497917, "units": "Tflops", "t": 1712255675.3309033}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255674.1472812}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255674.652726}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255675.1582773}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32"], "time": 1712255676.042694, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/fp32.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/fp32.D1.data new file mode 100644 index 000000000..9c68189e0 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/fp32.D1.data @@ -0,0 +1,342 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp32"}, "weight": 0.0, "name": "fp32", "tag": ["fp32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255547.659288, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32"], "time": 1712255547.66849}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 15.927861143345272, "units": "Tflops", "t": 1712255550.325242}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2410.4375, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712255548.8028693}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.1, "temperature": null, "power": null}}, "t": 1712255549.3089736}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.23, "temperature": null, "power": null}}, "t": 1712255549.8144863}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.43, "temperature": null, "power": null}}, "t": 1712255550.3201358}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.96590161097079, "units": "Tflops", "t": 1712255551.7033868}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.56, "temperature": null, "power": null}}, "t": 1712255550.825632}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.76, "temperature": null, "power": null}}, "t": 1712255551.33129}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.920836496091372, "units": "Tflops", "t": 1712255553.0853846}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}, "t": 1712255551.8366325}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255552.3419173}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255552.8474307}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.953537928996345, "units": "Tflops", "t": 1712255554.4643826}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255553.35291}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255553.8586426}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255554.3643098}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 16.086912982354253, "units": "Tflops", "t": 1712255555.8316953}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255554.8700383}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255555.375619}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 16.098542980668697, "units": "Tflops", "t": 1712255557.1980338}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255555.8811498}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255556.3867881}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255556.8923848}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 16.065952992138705, "units": "Tflops", "t": 1712255558.5672188}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255557.3979971}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255557.9035702}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255558.4092448}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 16.05193641729031, "units": "Tflops", "t": 1712255559.93773}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255558.9150112}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255559.420736}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255559.9263666}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 16.06166965119002, "units": "Tflops", "t": 1712255561.3074536}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255560.4320366}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255560.9377532}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 16.000539928098103, "units": "Tflops", "t": 1712255562.6822336}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255561.4433444}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255561.949639}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255562.4553719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.988740616948034, "units": "Tflops", "t": 1712255564.0582309}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255562.9609451}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255563.4666877}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255563.9725835}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 16.020016026065175, "units": "Tflops", "t": 1712255565.4313016}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255564.4781733}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255564.9836419}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 16.00336335878148, "units": "Tflops", "t": 1712255566.8057618}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255565.4890256}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255565.9944954}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255566.4998224}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.99346213511091, "units": "Tflops", "t": 1712255568.1809568}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255567.005266}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255567.5105267}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255568.0159228}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.960699184458605, "units": "Tflops", "t": 1712255569.558959}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255568.521181}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255569.0265794}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255569.5319989}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.960837282608383, "units": "Tflops", "t": 1712255570.936968}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255570.0372443}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255570.5424907}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.949021969708241, "units": "Tflops", "t": 1712255572.3159585}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255571.0477796}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255571.5530505}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255572.0583904}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.938777443046229, "units": "Tflops", "t": 1712255573.6958804}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255572.563781}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255573.069115}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255573.574452}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.955246218379944, "units": "Tflops", "t": 1712255575.0743537}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255574.0798788}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255574.5851853}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.896782521129694, "units": "Tflops", "t": 1712255576.4578922}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255575.0904922}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255575.5957592}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255576.1011443}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.899621525413934, "units": "Tflops", "t": 1712255577.84118}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255576.6064155}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255577.111898}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255577.6171765}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.889804416430762, "units": "Tflops", "t": 1712255579.2253482}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255578.1224818}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255578.627782}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255579.133088}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.924786582030451, "units": "Tflops", "t": 1712255580.606949}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255579.6383667}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255580.1438332}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.881806855633734, "units": "Tflops", "t": 1712255581.9921534}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255580.6491568}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255581.1544294}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255581.659799}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.911273150425377, "units": "Tflops", "t": 1712255583.3747807}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255582.165105}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255582.6704822}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255583.1757798}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.903654325186311, "units": "Tflops", "t": 1712255584.7578363}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255583.6810932}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255584.1863942}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255584.6917443}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.87575453155035, "units": "Tflops", "t": 1712255586.1432197}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255585.1970885}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255585.7025359}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.893670646825067, "units": "Tflops", "t": 1712255587.5270286}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255586.2079563}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255586.7132492}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255587.2186575}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.898010074231, "units": "Tflops", "t": 1712255588.910492}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255587.7239938}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255588.2294085}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255588.7349155}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.889147453807437, "units": "Tflops", "t": 1712255590.2950273}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255589.24036}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255589.7459192}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255590.2512972}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.85125076302189, "units": "Tflops", "t": 1712255591.6826117}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255590.756617}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255591.261997}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.845698086008236, "units": "Tflops", "t": 1712255593.070604}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255591.767399}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255592.2727113}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255592.7780836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.83433255195741, "units": "Tflops", "t": 1712255594.4596043}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255593.2834687}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255593.7888258}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255594.2941942}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.837062273657065, "units": "Tflops", "t": 1712255595.848394}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255594.7994752}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255595.304846}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255595.8101795}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.810579713622479, "units": "Tflops", "t": 1712255597.239484}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255596.315452}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255596.820816}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.830242450310054, "units": "Tflops", "t": 1712255598.6288505}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255597.3261344}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255597.831435}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255598.3369863}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.77574617801189, "units": "Tflops", "t": 1712255600.02301}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255598.8425097}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255599.3479187}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255599.853252}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.824544291675224, "units": "Tflops", "t": 1712255601.4128883}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255600.3586054}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255600.864059}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255601.369384}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.780275223819668, "units": "Tflops", "t": 1712255602.8066514}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255601.8748083}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255602.3801239}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.82332262793939, "units": "Tflops", "t": 1712255604.1966085}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255602.8854797}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255603.3907616}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255603.896141}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.801102309603413, "units": "Tflops", "t": 1712255605.5885296}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255604.4013927}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255604.9066634}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255605.4120035}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.780839512366876, "units": "Tflops", "t": 1712255606.982265}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255605.9172597}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255606.422697}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255606.927993}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.81647123407189, "units": "Tflops", "t": 1712255608.3728466}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255607.433484}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255607.9389668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.787997868637406, "units": "Tflops", "t": 1712255609.7658947}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255608.4449701}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255608.950273}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255609.4555984}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.824720769806998, "units": "Tflops", "t": 1712255611.1557574}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255609.961074}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255610.466375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255610.9719179}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.84609827149076, "units": "Tflops", "t": 1712255612.5437315}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255611.4772956}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255611.9825716}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255612.487816}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.827596560411902, "units": "Tflops", "t": 1712255613.933693}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255612.9932127}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255613.4986148}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.81486032259912, "units": "Tflops", "t": 1712255615.3245184}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255614.003885}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255614.5092156}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255615.0145226}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.825766144066446, "units": "Tflops", "t": 1712255616.714376}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255615.5199094}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255616.0251594}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255616.5305326}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.841770807445616, "units": "Tflops", "t": 1712255618.102733}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255617.0358903}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255617.5412104}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255618.0464497}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.797297212290763, "units": "Tflops", "t": 1712255619.4950016}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255618.551813}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255619.057385}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.811921390430832, "units": "Tflops", "t": 1712255620.8859334}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255619.5635452}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255620.0690506}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255620.574435}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.805101530966033, "units": "Tflops", "t": 1712255622.277515}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255621.0797808}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255621.585039}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255622.0903418}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.780234726193733, "units": "Tflops", "t": 1712255623.6712992}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255622.595719}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255623.1009667}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255623.6062264}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.812458124914539, "units": "Tflops", "t": 1712255625.0622237}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255624.1115322}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255624.6170847}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.762621785574742, "units": "Tflops", "t": 1712255626.4575264}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255625.12236}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255625.6277254}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255626.1331651}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.800769358082206, "units": "Tflops", "t": 1712255627.8494701}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255626.638466}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255627.1438243}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255627.6491601}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.772460334203938, "units": "Tflops", "t": 1712255629.2439322}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255628.1547961}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255628.6601436}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255629.1656568}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.7827189652354, "units": "Tflops", "t": 1712255630.6374855}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255629.6711864}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255630.1766105}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.762551746790068, "units": "Tflops", "t": 1712255632.0327938}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255630.6818712}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255631.1871288}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255631.6924818}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.744620768225206, "units": "Tflops", "t": 1712255633.4297032}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255632.1978962}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255632.7032506}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255633.20859}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.7515015014894, "units": "Tflops", "t": 1712255634.826023}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255633.71408}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255634.2194915}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255634.7248755}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.76974744263548, "units": "Tflops", "t": 1712255636.2210338}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255635.2301898}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255635.7355587}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.76835899744905, "units": "Tflops", "t": 1712255637.6158917}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255636.2409394}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255636.74635}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255637.2517416}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.7330669302239, "units": "Tflops", "t": 1712255639.013847}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255637.75715}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255638.2628024}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255638.7683449}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.790625127832094, "units": "Tflops", "t": 1712255640.406692}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255639.2736256}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255639.7790394}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255640.2844267}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.74997372296358, "units": "Tflops", "t": 1712255641.8031268}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255640.7900374}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255641.2954571}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255641.8009105}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.77880394348938, "units": "Tflops", "t": 1712255643.197019}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255642.306196}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255642.8116083}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.736752539702902, "units": "Tflops", "t": 1712255644.594863}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255643.316945}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255643.8224695}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255644.327852}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.716386533654639, "units": "Tflops", "t": 1712255645.9947503}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255644.8331714}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255645.3386047}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255645.8440557}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.71156755506454, "units": "Tflops", "t": 1712255647.3949244}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255646.3494732}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255646.8565965}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255647.3619893}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.721468402034684, "units": "Tflops", "t": 1712255648.7942026}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255647.8682337}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255648.3735483}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.711377533712646, "units": "Tflops", "t": 1712255650.194333}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255648.8789198}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255649.3842778}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255649.8896177}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.734022392237087, "units": "Tflops", "t": 1712255651.592633}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255650.3962462}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255650.9017773}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255651.4073474}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.703523772273826, "units": "Tflops", "t": 1712255652.993522}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255651.9128258}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255652.4182656}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255652.9272428}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.715805422313117, "units": "Tflops", "t": 1712255654.3933566}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255653.4329352}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255653.9384727}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.716723972864612, "units": "Tflops", "t": 1712255655.7931292}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255654.443861}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255654.949182}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255655.4546654}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.711966347212744, "units": "Tflops", "t": 1712255657.1933308}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255655.9600194}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255656.4652593}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255656.9706042}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.713899036188513, "units": "Tflops", "t": 1712255658.5932384}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255657.4759762}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255657.9811897}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255658.4867256}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.726347085312163, "units": "Tflops", "t": 1712255659.9921033}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255658.9919832}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255659.497483}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.72239833501485, "units": "Tflops", "t": 1712255661.3913586}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255660.002859}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255660.5081415}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255661.0134037}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.722961172099689, "units": "Tflops", "t": 1712255662.790462}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255661.5186653}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255662.0240808}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255662.5293708}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.698677941166077, "units": "Tflops", "t": 1712255664.1917977}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255663.0347106}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255663.5401568}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255664.0454066}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.699166932373712, "units": "Tflops", "t": 1712255665.5930223}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255664.5507834}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255665.0564866}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255665.561961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.726698360101759, "units": "Tflops", "t": 1712255666.991662}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255666.0672402}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255666.5725327}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.735077292663831, "units": "Tflops", "t": 1712255668.3894024}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255667.0779963}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255667.58336}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255668.0886757}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.704240343090143, "units": "Tflops", "t": 1712255669.7899332}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255668.5942047}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255669.0996523}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255669.6050286}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.698303870097522, "units": "Tflops", "t": 1712255671.1909719}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255670.1102896}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255670.615615}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255671.121193}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.719726748644288, "units": "Tflops", "t": 1712255672.5901182}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255671.6265645}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255672.1320064}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.703232349864692, "units": "Tflops", "t": 1712255673.9906921}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255672.6373641}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255673.1428497}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255673.6485028}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32"], "time": 1712255674.5593328, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/llama.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/llama.D0.data new file mode 100644 index 000000000..772729c25 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/llama.D0.data @@ -0,0 +1,33 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/llm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "llm", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 800, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama", "plan": {"method": "per_gpu"}, "tags": ["llm", "nlp"], "weight": 1.0, "name": "llama", "tag": ["llama", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255328.306572, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["python", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1712255330.5847871}, "pipe": null} +{"event": "line", "data": "Dataset\n", "pipe": "stderr"} +{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/datasets/table.py:1421: FutureWarning: promote has been superseded by mode='default'.\n", "pipe": "stderr"} +{"event": "line", "data": " table = cls._concat_blocks(blocks, axis=0)\n", "pipe": "stderr"} +{"event": "line", "data": "Tokenizer\n", "pipe": "stderr"} +{"event": "line", "data": "Model\n", "pipe": "stderr"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py\", line 231, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py\", line 227, in main\n", "pipe": "stderr"} +{"event": "line", "data": " return huggingface_main(args, model, config)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py\", line 143, in huggingface_main\n", "pipe": "stderr"} +{"event": "line", "data": " model = LlamaForCausalLM(LlamaConfig.from_dict(config)).cuda()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/modeling_utils.py\", line 2243, in cuda\n", "pipe": "stderr"} +{"event": "line", "data": " return super().cuda(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 918, in cuda\n", "pipe": "stderr"} +{"event": "line", "data": " return self._apply(lambda t: t.cuda(device))\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 810, in _apply\n", "pipe": "stderr"} +{"event": "line", "data": " module._apply(fn)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 810, in _apply\n", "pipe": "stderr"} +{"event": "line", "data": " module._apply(fn)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 810, in _apply\n", "pipe": "stderr"} +{"event": "line", "data": " module._apply(fn)\n", "pipe": "stderr"} +{"event": "line", "data": " [Previous line repeated 2 more times]\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 833, in _apply\n", "pipe": "stderr"} +{"event": "line", "data": " param_applied = fn(param)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 918, in \n", "pipe": "stderr"} +{"event": "line", "data": " return self._apply(lambda t: t.cuda(device))\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 172.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 150.19 MiB is free. Including non-PyTorch memory, this process has 21.43 GiB memory in use. Of the allocated memory 21.20 GiB is allocated by PyTorch, and 9.14 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["python", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1712255394.1711965, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/llama.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/llama.D1.data new file mode 100644 index 000000000..68b4c69a3 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/llama.D1.data @@ -0,0 +1,34 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/llm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "llm", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 800, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama", "plan": {"method": "per_gpu"}, "tags": ["llm", "nlp"], "weight": 1.0, "name": "llama", "tag": ["llama", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255330.576584, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["python", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1712255330.585346}, "pipe": null} +{"event": "line", "data": "Dataset\n", "pipe": "stderr"} +{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/datasets/table.py:1421: FutureWarning: promote has been superseded by mode='default'.\n", "pipe": "stderr"} +{"event": "line", "data": " table = cls._concat_blocks(blocks, axis=0)\n", "pipe": "stderr"} +{"event": "line", "data": "Tokenizer\n", "pipe": "stderr"} +{"event": "line", "data": "Model\n", "pipe": "stderr"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py\", line 231, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py\", line 227, in main\n", "pipe": "stderr"} +{"event": "line", "data": " return huggingface_main(args, model, config)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py\", line 143, in huggingface_main\n", "pipe": "stderr"} +{"event": "line", "data": " model = LlamaForCausalLM(LlamaConfig.from_dict(config)).cuda()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/modeling_utils.py\", line 2243, in cuda\n", "pipe": "stderr"} +{"event": "line", "data": " return super().cuda(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 918, in cuda\n", "pipe": "stderr"} +{"event": "line", "data": " return self._apply(lambda t: t.cuda(device))\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 810, in _apply\n", "pipe": "stderr"} +{"event": "line", "data": " module._apply(fn)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 810, in _apply\n", "pipe": "stderr"} +{"event": "line", "data": " module._apply(fn)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 810, in _apply\n", "pipe": "stderr"} +{"event": "line", "data": " module._apply(fn)\n", "pipe": "stderr"} +{"event": "line", "data": " [Previous line repeated 2 more times]\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 833, in _apply\n", "pipe": "stderr"} +{"event": "line", "data": " param_applied = fn(param)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 918, in \n", "pipe": "stderr"} +{"event": "line", "data": " return self._apply(lambda t: t.cuda(device))\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.", "pipe": "stderr"} +{"event": "line", "data": "OutOfMemoryError: CUDA out of memory. Tried to allocate 172.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 150.19 MiB is free. Including non-PyTorch memory, this process has 21.43 GiB memory in use. Of the allocated memory 21.20 GiB is allocated by PyTorch, and 9.14 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["python", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1712255393.429458, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/opt-1_3b-multinode.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/opt-1_3b-multinode.data new file mode 100644 index 000000000..3b45f0015 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/opt-1_3b-multinode.data @@ -0,0 +1 @@ +{"event": "message", "data": {"message": "Skip opt-1_3b-multinode because the following capability is not satisfied: len(nodes) >= 2"}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/opt-1_3b.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/opt-1_3b.data new file mode 100644 index 000000000..3e2c6cea0 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/opt-1_3b.data @@ -0,0 +1 @@ +{"event": "error", "data": {"type": "KeyError", "message": "'port'", "trace": "Traceback (most recent call last):\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/multi.py\", line 202, in do_run\n await exec_plan.execute(\"run\", timeout=True, timeout_delay=600)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 108, in execute\n return await execute_command(self, phase, timeout, timeout_delay, **kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/executors.py\", line 57, in execute_command\n for pack, argv, _kwargs in command.commands():\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 125, in commands\n yield self.pack, self.argv(), self.kwargs()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 121, in argv\n return self._argv(**kwargs) + self.exec.argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 304, in argv\n script_args = self.exec.argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 122, in argv\n return self._argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 682, in _argv\n f\"--main_process_port={manager['port']}\",\nKeyError: 'port'\n"}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/opt-6_7b-multinode.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/opt-6_7b-multinode.data new file mode 100644 index 000000000..cccd5c098 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/opt-6_7b-multinode.data @@ -0,0 +1 @@ +{"event": "message", "data": {"message": "Skip opt-6_7b-multinode because the following capability is not satisfied: len(nodes) >= 2"}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/opt-6_7b.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/opt-6_7b.data new file mode 100644 index 000000000..3e2c6cea0 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/opt-6_7b.data @@ -0,0 +1 @@ +{"event": "error", "data": {"type": "KeyError", "message": "'port'", "trace": "Traceback (most recent call last):\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/multi.py\", line 202, in do_run\n await exec_plan.execute(\"run\", timeout=True, timeout_delay=600)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 108, in execute\n return await execute_command(self, phase, timeout, timeout_delay, **kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/executors.py\", line 57, in execute_command\n for pack, argv, _kwargs in command.commands():\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 125, in commands\n yield self.pack, self.argv(), self.kwargs()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 121, in argv\n return self._argv(**kwargs) + self.exec.argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 304, in argv\n script_args = self.exec.argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 122, in argv\n return self._argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 682, in _argv\n f\"--main_process_port={manager['port']}\",\nKeyError: 'port'\n"}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/reformer.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/reformer.D0.data new file mode 100644 index 000000000..cb304d036 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/reformer.D0.data @@ -0,0 +1,47 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Reformer", "--batch-size": 64}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "transformer"], "weight": 1.0, "name": "reformer", "tag": ["reformer", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255852.033217, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-reformer.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Reformer", "--batch-size", "64"], "time": 1712255854.4108155}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "line", "data": "We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23555.8125, 24512.0], "load": 0.43, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacty of 23.73 GiB of which 956.19 MiB is free. Including non-PyTorch memory, this process has 20.65 GiB memory in use. Of the allocated memory 18.80 GiB is allocated by PyTorch, and 1.56 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"} +{"event": "line", "data": " runner.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"} +{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 59, in step\n", "pipe": "stderr"} +{"event": "line", "data": " self.amp_scaler.scale(loss).backward()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/function.py\", line 288, in apply\n", "pipe": "stderr"} +{"event": "line", "data": " return user_fn(self, *args)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/reformer/modeling_reformer.py\", line 1677, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " output = layer.backward_pass(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/reformer/modeling_reformer.py\", line 1564, in backward_pass\n", "pipe": "stderr"} +{"event": "line", "data": " output.backward(grad_attn_output, retain_graph=True)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacty of 23.73 GiB of which 956.19 MiB is free. Including non-PyTorch memory, this process has 20.65 GiB memory in use. Of the allocated memory 18.80 GiB is allocated by PyTorch, and 1.56 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-reformer.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Reformer", "--batch-size", "64"], "time": 1712255858.6136966, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/reformer.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/reformer.D1.data new file mode 100644 index 000000000..c491ddda2 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/reformer.D1.data @@ -0,0 +1,47 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Reformer", "--batch-size": 64}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "transformer"], "weight": 1.0, "name": "reformer", "tag": ["reformer", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255854.393929, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-reformer.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Reformer", "--batch-size", "64"], "time": 1712255854.418388}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "line", "data": "We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23555.8125, 24512.0], "load": 0.44, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacty of 23.73 GiB of which 956.19 MiB is free. Including non-PyTorch memory, this process has 20.65 GiB memory in use. Of the allocated memory 18.80 GiB is allocated by PyTorch, and 1.56 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"} +{"event": "line", "data": " runner.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"} +{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 59, in step\n", "pipe": "stderr"} +{"event": "line", "data": " self.amp_scaler.scale(loss).backward()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/function.py\", line 288, in apply\n", "pipe": "stderr"} +{"event": "line", "data": " return user_fn(self, *args)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/reformer/modeling_reformer.py\", line 1677, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " output = layer.backward_pass(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/reformer/modeling_reformer.py\", line 1564, in backward_pass\n", "pipe": "stderr"} +{"event": "line", "data": " output.backward(grad_attn_output, retain_graph=True)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacty of 23.73 GiB of which 956.19 MiB is free. Including non-PyTorch memory, this process has 20.65 GiB memory in use. Of the allocated memory 18.80 GiB is allocated by PyTorch, and 1.56 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-reformer.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Reformer", "--batch-size", "64"], "time": 1712255858.685499, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/regnet_y_128gf.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/regnet_y_128gf.D0.data new file mode 100644 index 000000000..e8d73d861 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/regnet_y_128gf.D0.data @@ -0,0 +1,77 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "regnet_y_128gf", "--batch-size": 64}, "tags": ["classification", "convnet", "lstm", "resnet", "vision"], "weight": 2.0, "name": "regnet_y_128gf", "tag": ["regnet_y_128gf", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255794.463088, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-regnet_y_128gf.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "regnet_y_128gf", "--batch-size", "64"], "time": 1712255796.8223135}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24379.8125, 24512.0], "load": 0.08, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24449.8125, 24512.0], "load": 0.08, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 70.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 62.19 MiB is free. Including non-PyTorch memory, this process has 21.52 GiB memory in use. Of the allocated memory 20.37 GiB is allocated by PyTorch, and 892.38 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/regnet.py\", line 378, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.trunk_output(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/regnet.py\", line 147, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = x + self.f(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 70.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 62.19 MiB is free. Including non-PyTorch memory, this process has 21.52 GiB memory in use. Of the allocated memory 20.37 GiB is allocated by PyTorch, and 892.38 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-regnet_y_128gf.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "regnet_y_128gf", "--batch-size", "64"], "time": 1712255805.1206453, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/regnet_y_128gf.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/regnet_y_128gf.D1.data new file mode 100644 index 000000000..4db96aece --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/regnet_y_128gf.D1.data @@ -0,0 +1,77 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "regnet_y_128gf", "--batch-size": 64}, "tags": ["classification", "convnet", "lstm", "resnet", "vision"], "weight": 2.0, "name": "regnet_y_128gf", "tag": ["regnet_y_128gf", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255796.804558, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-regnet_y_128gf.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "regnet_y_128gf", "--batch-size", "64"], "time": 1712255796.8299522}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24379.8125, 24512.0], "load": 0.08, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24449.8125, 24512.0], "load": 0.08, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 70.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 62.19 MiB is free. Including non-PyTorch memory, this process has 21.52 GiB memory in use. Of the allocated memory 20.37 GiB is allocated by PyTorch, and 892.38 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/regnet.py\", line 378, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.trunk_output(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/regnet.py\", line 147, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = x + self.f(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 70.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 62.19 MiB is free. Including non-PyTorch memory, this process has 21.52 GiB memory in use. Of the allocated memory 20.37 GiB is allocated by PyTorch, and 892.38 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-regnet_y_128gf.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "regnet_y_128gf", "--batch-size", "64"], "time": 1712255805.409879, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet152-multi.0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet152-multi.0.data new file mode 100644 index 000000000..db2f3df08 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet152-multi.0.data @@ -0,0 +1,184 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "njobs", "n": 1}, "argv": {"--amp": true, "--model": "resnet152", "--batch-size": 256}, "tags": ["classification", "convnet", "multigpu", "resnet", "vision"], "weight": 5.0, "name": "resnet152-multi", "tag": ["resnet152-multi", "0"], "job-number": 0, "devices": ["0", "1"]}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255890.820334, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["torchrun", "--nproc_per_node=2", "--", "-m", "voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/resnet152-multi.0", "--checkpoint-hist", "1"], "time": 1712255890.8366482}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training in distributed mode with multiple processes, 1 device per process.Process 1, total 2, device cuda:1.\n", "pipe": "stderr"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training in distributed mode with multiple processes, 1 device per process.Process 0, total 2, device cuda:0.\n", "pipe": "stderr"} +{"event": "line", "data": "Model resnet152 created, param count:60192808\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.2) calculated from base learning rate (0.1) and global batch size (512) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch DistributedDataParallel.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 98.00 MiB. GPU 1 has a total capacty of 23.73 GiB of which 98.25 MiB is free. Including non-PyTorch memory, this process has 21.49 GiB memory in use. Of the allocated memory 20.87 GiB is allocated by PyTorch, and 237.64 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [15015.75, 24512.0], "load": 0.25, "temperature": null, "power": null}, "1": {"memory": [14779.75, 24512.0], "load": 0.26, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 196, in _run_module_as_main\n", "pipe": "stderr"} +{"event": "line", "data": " return _run_code(code, main_globals, None,\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 86, in _run_code\n", "pipe": "stderr"} +{"event": "line", "data": " exec(code, run_globals)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/__main__.py\", line 4, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1519, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " else self._run_ddp_forward(*inputs, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1355, in _run_ddp_forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self.module(*inputs, **kwargs) # type: ignore[index]\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 835, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 824, in forward_features\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.layer3(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 485, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.conv3(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 98.00 MiB. GPU 1 has a total capacty of 23.73 GiB of which 98.25 MiB is free. Including non-PyTorch memory, this process has 21.49 GiB memory in use. Of the allocated memory 20.87 GiB is allocated by PyTorch, and 237.64 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24455.75, 24512.0], "load": 0.32, "temperature": null, "power": null}, "1": {"memory": [24413.75, 24512.0], "load": 0.32, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 26.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 56.25 MiB is free. Including non-PyTorch memory, this process has 21.53 GiB memory in use. Of the allocated memory 20.82 GiB is allocated by PyTorch, and 328.64 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 196, in _run_module_as_main\n", "pipe": "stderr"} +{"event": "line", "data": " return _run_code(code, main_globals, None,\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 86, in _run_code\n", "pipe": "stderr"} +{"event": "line", "data": " exec(code, run_globals)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/__main__.py\", line 4, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1519, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " else self._run_ddp_forward(*inputs, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1355, in _run_ddp_forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self.module(*inputs, **kwargs) # type: ignore[index]\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 835, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 824, in forward_features\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.layer3(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 479, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.conv2(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 26.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 56.25 MiB is free. Including non-PyTorch memory, this process has 21.53 GiB memory in use. Of the allocated memory 20.82 GiB is allocated by PyTorch, and 328.64 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "line", "data": "[2024-04-04 18:38:21,826] torch.distributed.elastic.multiprocessing.api: [ERROR] failed (exitcode: 1) local_rank: 0 (pid: 33457) of binary: /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/python\n", "pipe": "stderr"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/torchrun\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py\", line 346, in wrapper\n", "pipe": "stderr"} +{"event": "line", "data": " return f(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/run.py\", line 806, in main\n", "pipe": "stderr"} +{"event": "line", "data": " run(args)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/run.py\", line 797, in run\n", "pipe": "stderr"} +{"event": "line", "data": " elastic_launch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/launcher/api.py\", line 134, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " return launch_agent(self._config, self._entrypoint, list(args))\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/launcher/api.py\", line 264, in launch_agent\n", "pipe": "stderr"} +{"event": "line", "data": " raise ChildFailedError(\n", "pipe": "stderr"} +{"event": "line", "data": "torch.distributed.elastic.multiprocessing.errors.ChildFailedError: \n", "pipe": "stderr"} +{"event": "line", "data": "============================================================\n", "pipe": "stderr"} +{"event": "line", "data": "voir FAILED\n", "pipe": "stderr"} +{"event": "line", "data": "------------------------------------------------------------\n", "pipe": "stderr"} +{"event": "line", "data": "Failures:\n", "pipe": "stderr"} +{"event": "line", "data": "[1]:\n", "pipe": "stderr"} +{"event": "line", "data": " time : 2024-04-04_18:38:21\n", "pipe": "stderr"} +{"event": "line", "data": " host : decentoriole.internal.cloudapp.net\n", "pipe": "stderr"} +{"event": "line", "data": " rank : 1 (local_rank: 1)\n", "pipe": "stderr"} +{"event": "line", "data": " exitcode : 1 (pid: 33458)\n", "pipe": "stderr"} +{"event": "line", "data": " error_file: \n", "pipe": "stderr"} +{"event": "line", "data": " traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html\n", "pipe": "stderr"} +{"event": "line", "data": "------------------------------------------------------------\n", "pipe": "stderr"} +{"event": "line", "data": "Root Cause (first observed failure):\n", "pipe": "stderr"} +{"event": "line", "data": "[0]:\n", "pipe": "stderr"} +{"event": "line", "data": " time : 2024-04-04_18:38:21\n", "pipe": "stderr"} +{"event": "line", "data": " host : decentoriole.internal.cloudapp.net\n", "pipe": "stderr"} +{"event": "line", "data": " rank : 0 (local_rank: 0)\n", "pipe": "stderr"} +{"event": "line", "data": " exitcode : 1 (pid: 33457)\n", "pipe": "stderr"} +{"event": "line", "data": " error_file: \n", "pipe": "stderr"} +{"event": "line", "data": " traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html\n", "pipe": "stderr"} +{"event": "line", "data": "============================================================\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["torchrun", "--nproc_per_node=2", "--", "-m", "voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/resnet152-multi.0", "--checkpoint-hist", "1"], "time": 1712255902.1882682, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet152.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet152.D0.data new file mode 100644 index 000000000..6debcec37 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet152.D0.data @@ -0,0 +1,69 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "resnet152", "--batch-size": 256}, "tags": ["classification", "convnet", "resnet", "vision"], "weight": 1.0, "name": "resnet152", "tag": ["resnet152", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255878.736862, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/resnet152.D0", "--checkpoint-hist", "1"], "time": 1712255881.0318313}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model resnet152 created, param count:60192808\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.1) calculated from base learning rate (0.1) and global batch size (256) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24389.8125, 24512.0], "load": 0.32, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 98.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 122.19 MiB is free. Including non-PyTorch memory, this process has 21.46 GiB memory in use. Of the allocated memory 20.92 GiB is allocated by PyTorch, and 248.63 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 835, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 824, in forward_features\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.layer3(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 485, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.conv3(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 98.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 122.19 MiB is free. Including non-PyTorch memory, this process has 21.46 GiB memory in use. Of the allocated memory 20.92 GiB is allocated by PyTorch, and 248.63 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/resnet152.D0", "--checkpoint-hist", "1"], "time": 1712255888.4096215, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet152.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet152.D1.data new file mode 100644 index 000000000..86517af01 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet152.D1.data @@ -0,0 +1,69 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "resnet152", "--batch-size": 256}, "tags": ["classification", "convnet", "resnet", "vision"], "weight": 1.0, "name": "resnet152", "tag": ["resnet152", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255881.015572, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/resnet152.D1", "--checkpoint-hist", "1"], "time": 1712255881.0389137}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model resnet152 created, param count:60192808\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.1) calculated from base learning rate (0.1) and global batch size (256) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24451.8125, 24512.0], "load": 0.3, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 26.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 60.19 MiB is free. Including non-PyTorch memory, this process has 21.52 GiB memory in use. Of the allocated memory 20.87 GiB is allocated by PyTorch, and 359.13 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 835, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 824, in forward_features\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.layer3(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 479, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.conv2(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 26.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 60.19 MiB is free. Including non-PyTorch memory, this process has 21.52 GiB memory in use. Of the allocated memory 20.87 GiB is allocated by PyTorch, and 359.13 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/tuzazolu.2024-04-04_18:28:45.589863/resnet152.D1", "--checkpoint-hist", "1"], "time": 1712255888.4892821, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet50.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet50.D0.data new file mode 100644 index 000000000..e939444f3 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet50.D0.data @@ -0,0 +1,1317 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "resnet50", "--batch-size": 64}, "tags": ["classification", "convnet", "resnet", "vision"], "weight": 1.0, "name": "resnet50", "tag": ["resnet50", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255678.375334, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-resnet50.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "resnet50", "--batch-size", "64"], "time": 1712255680.699123}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01910400390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14129638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14862060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10113525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0660400390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13128662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06024169921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08062744140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1383056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08367919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.15252685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1346435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.12567138671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07647705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.16888427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1417236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.5, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10540771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03192138671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.125244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0218505859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97784423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0223388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9576416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97723388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.026123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.117919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06207275390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98907470703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05682373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1102294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.992431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0548095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03485107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1063232421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9847412109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03143310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95245361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90667724609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94012451171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.148681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05609130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05523681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06109619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06329345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07000732421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0882568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03497314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00689697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98052978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08697509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1002197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 572.1126740233907, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95965576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.09124755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03094482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93170166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95501708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9737548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0040283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10992431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 580.348844062986, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89483642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84197998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.776611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.77, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86297607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97552490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 405.8079451227742, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88336181640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8970947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9532470703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8927001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97161865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8377685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93768310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8402099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97271728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 581.2733954821643, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97845458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7388916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.984619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00238037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91485595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.008056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96917724609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 575.9091595446283, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9864501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.09857177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97723388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.979736328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95098876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92535400390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9305419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 575.7359657588951, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01165771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94598388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99298095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0369873046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89678955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92120361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0009765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99786376953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 575.0024466553757, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.897705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92364501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93316650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04547119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.989013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01519775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9361572265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99517822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 574.6046994749636, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93121337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97625732421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.028564453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9761962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95465087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.15826416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96490478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 576.5207671733423, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00274658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0006103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9759521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [2, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.729736328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7237548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8052978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 435.56210754897063, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8165283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88201904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.858642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78741455078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9552001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99932861328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91802978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89263916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 573.2884214406574, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99627685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8643798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.817138671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00042724609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88897705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91864013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.79, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92852783203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87933349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87811279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 573.4048014061, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84014892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9122314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96112060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.945068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96295166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03204345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9461669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9356689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 575.6347367341526, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91021728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9205322265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82989501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96795654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9688720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9881591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 565.461491080691, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93280029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07684326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96673583984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9493408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.936767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9932861328125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 567.2251321424063, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0050048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96368408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0535888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97381591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9410400390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0421142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98626708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98382568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 570.0599739315137, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83575439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04046630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95758056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0806884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95721435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89801025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00994873046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96990966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [3, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.81915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 574.2348067977822, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8685302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82470703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86651611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86968994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.79, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7615966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 451.5874267754516, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85400390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89483642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96588134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9073486328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9671630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8880615234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7904052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8543701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 576.412743803677, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86492919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95697021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83990478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9840087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94183349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8853759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95355224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93658447265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01934814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 561.3534957865011, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89849853515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90081787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00103759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87249755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.893310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92474365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9840087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 569.7193579652935, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8936767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0164794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97174072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02459716796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89459228515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00933837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95965576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96136474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 574.0104922620504, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.951904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0091552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97320556640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.035888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7872314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9674072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9561767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98101806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 571.3174286509386, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9078369140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95440673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86907958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8763427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85687255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98529052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01007080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0015869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98931884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 567.9075860864638, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07196044921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90740966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [4, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.76171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79718017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8758544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.757080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 426.0030863923609, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.74407958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8489990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.886962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85711669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87554931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87847900390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 572.0116942229658, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.990478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.844482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93585205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79632568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83502197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.78, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90557861328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79718017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9254150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 569.2570534284409, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89666748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03643798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79534912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99017333984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97515869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9012451171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.984130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95159912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94427490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 574.260241844676, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93963623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98297119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9666748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85235595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92718505859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04656982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0394287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.895751953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.81536865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 565.6000572277281, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92144775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.017822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.886474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11016845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90948486328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01763916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96368408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9893798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 575.9483316751655, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02923583984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93634033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.954833984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00177001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86456298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01214599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9852294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 570.0673992170645, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99310302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93255615234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0482177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00762939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.12774658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [5, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7882080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 577.5541043660687, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.812255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.872802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91485595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78887939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.79, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85943603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79376220703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 456.59009531270084, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8995361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96575927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91510009765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91705322265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92816162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7269287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9024658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8709716796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94378662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 574.1525470527383, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8624267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.988037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87945556640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.883056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06353759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88275146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8873291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94964599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02496337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 561.9250919141896, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96051025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89263916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96856689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9273681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83465576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 571.3177318211139, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.899169921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8436279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94610595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99530029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96063232421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83734130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.919189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04193115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 573.5795939165216, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92437744140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92291259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02398681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.950439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92779541015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9212646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9705810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97064208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 573.0953823927051, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.884033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0501708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01141357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9388427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9471435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86895751953125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13372802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9693603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94915771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 578.6938484520202, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [6, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8272705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.74981689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8818359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85467529296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8621826171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 429.92751688530814, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88482666015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938232421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9154052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8677978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85736083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9222412109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8905029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8768310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87493896484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 566.540881050484, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94610595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93951416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86285400390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9405517578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89117431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.79, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9681396484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.916259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8900146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 558.7132570629016, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9620361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8468017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08172607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8963623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97369384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96624755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01641845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91741943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97906494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 569.3163028924275, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93377685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8817138671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91033935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.803466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9432373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9241943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02960205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86810302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 559.0276765625225, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98382568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04705810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9481201171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89422607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.932861328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92510986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86151123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88433837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96588134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 561.8765233730917, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05517578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.895263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9244384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86724853515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.18231201171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.015869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01800537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9686279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9774169921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 569.8678943525947, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8953857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0140380859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99749755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94049072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [7, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.904052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78289794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 425.6825269757262, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8101806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.73468017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92413330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88800048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.79, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92901611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.814208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79412841796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9886474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.76678466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 544.5606316444989, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8865966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.877197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80401611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.832763671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.725341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84552001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8594970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97454833984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 558.6010243107713, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89898681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.863525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92596435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96124267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90216064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8924560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.928955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 574.3507340622675, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99713134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08917236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83929443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8880615234375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88873291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00555419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.73919677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0775146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86798095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 571.5214844927647, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9541015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96783447265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95538330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99176025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9661865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.020263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.972412109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 566.4233869041755, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9315185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01971435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96453857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96722412109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9183349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95294189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8988037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.09783935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 570.3860957741948, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9989013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01422119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9864501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86346435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93499755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.81903076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0323486328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [8, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 579.3909853972872, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7816162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88311767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9239501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.81005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7730712890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9024658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 417.7012759244583, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80145263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80615234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9471435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0303955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87921142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9085693359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.73345947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 552.4939159557017, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80206298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.916259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8074951171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.78, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9952392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99249267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88726806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 550.6372648864318, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86572265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96881103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88702392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9696044921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92523193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9544677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94122314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9337158203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 561.7081264832592, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0208740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03631591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97393798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96246337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0494384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.81549072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8983154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9752197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8819580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 555.5399100985388, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99969482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.87, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.965576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02691650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95599365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8226318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9971923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 560.3661852150547, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90887451171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9595947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9736328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.911865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85284423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10369873046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8717041015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.015380859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 563.5735283189201, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5837.8125, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-resnet50.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "resnet50", "--batch-size", "64"], "time": 1712255750.8894126, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet50.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet50.D1.data new file mode 100644 index 000000000..987a3fb7b --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/resnet50.D1.data @@ -0,0 +1,1311 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "resnet50", "--batch-size": 64}, "tags": ["classification", "convnet", "resnet", "vision"], "weight": 1.0, "name": "resnet50", "tag": ["resnet50", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255680.681726, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-resnet50.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "resnet50", "--batch-size", "64"], "time": 1712255680.706513}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01910400390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14129638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14862060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10113525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0660400390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13128662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06024169921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08062744140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1383056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08367919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.15252685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1346435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.12567138671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07647705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.16888427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1417236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.49, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10540771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03192138671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.125244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0218505859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97784423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0223388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9576416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97723388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.026123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.117919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06207275390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98907470703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05682373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1102294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.992431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0548095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03485107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1063232421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9847412109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03143310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95245361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90667724609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94012451171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.148681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05609130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05523681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06109619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06329345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07000732421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0882568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03497314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00689697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98052978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08697509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1002197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 570.1563523729465, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95965576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.09124755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03094482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93170166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95501708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9737548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0040283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10992431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 582.8048916927227, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89483642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84197998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.76, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.776611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86297607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97552490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 433.1616331155187, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88336181640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8970947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9532470703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8927001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97161865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8377685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93768310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8402099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 569.7099898121512, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97271728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97845458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7388916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.984619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00238037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91485595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.008056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 552.8042857349994, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96917724609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9864501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.09857177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97723388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.979736328125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95098876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92535400390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 566.87817229171, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9305419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01165771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94598388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99298095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0369873046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89678955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92120361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0009765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 565.5704199506474, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99786376953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.897705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92364501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93316650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04547119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.989013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01519775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9361572265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 558.5055616778927, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99517822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93121337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97625732421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.028564453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9761962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.87, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95465087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.15826416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 569.1175752260376, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96490478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00274658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0006103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9759521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [2, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.729736328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 571.5084703538287, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7237548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8052978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8165283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88201904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.858642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78741455078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9552001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99932861328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 484.1458238009069, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91802978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89263916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99627685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8643798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.817138671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00042724609375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.77, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88897705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91864013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92852783203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 570.2715097705955, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87933349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87811279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84014892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9122314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96112060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.945068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96295166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03204345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9461669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 554.6307761393889, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9356689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91021728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9205322265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82989501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96795654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9688720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 558.7501865738172, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9881591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93280029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07684326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96673583984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9493408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.87, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.936767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9932861328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 553.0826912625816, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0050048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96368408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0535888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97381591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9410400390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0421142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98626708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 563.6315674306203, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98382568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83575439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04046630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95758056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0806884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95721435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89801025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00994873046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96990966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 571.6441158537912, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [3, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.81915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8685302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.76, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82470703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86651611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86968994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 434.73906882971664, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7615966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85400390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89483642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96588134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9073486328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9671630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8880615234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 543.6334542072432, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7904052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8543701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86492919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95697021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83990478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9840087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94183349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8853759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95355224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 562.5089556479531, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93658447265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01934814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89849853515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90081787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00103759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87249755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.893310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 567.6088695023132, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92474365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9840087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8936767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0164794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97174072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02459716796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89459228515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00933837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 557.7502916561972, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95965576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96136474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.951904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0091552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97320556640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.035888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7872314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9674072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 570.5971779422605, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9561767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98101806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9078369140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95440673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86907958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8763427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85687255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98529052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01007080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 563.1773665273919, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0015869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98931884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07196044921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90740966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [4, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.76171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79718017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 432.6962119370114, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8758544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.757080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.74407958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8489990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.886962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85711669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 576.1582136216188, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87554931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87847900390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.990478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.844482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.76, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93585205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79632568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83502197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90557861328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 572.6980333200548, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79718017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9254150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89666748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03643798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79534912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99017333984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97515869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9012451171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.984130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 570.1022816346485, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95159912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94427490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93963623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98297119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9666748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85235595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92718505859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04656982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0394287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 557.203147974518, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.895751953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.81536865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92144775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.017822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.886474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11016845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90948486328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01763916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96368408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 579.042804389421, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9893798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02923583984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93634033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.954833984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00177001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86456298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01214599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 578.9910544706008, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9852294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99310302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93255615234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0482177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00762939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.12774658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [5, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 578.5008305995159, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7882080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.78, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.812255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.872802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91485595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78887939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 438.27047199980177, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85943603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79376220703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8995361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96575927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91510009765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91705322265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92816162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7269287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9024658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 562.6331424079116, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8709716796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94378662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8624267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.988037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87945556640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.883056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06353759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88275146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8873291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 558.5106511403991, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94964599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02496337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96051025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89263916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96856689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9273681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83465576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 564.6977257925548, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.899169921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8436279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94610595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99530029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96063232421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83734130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.919189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 566.0334160618031, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04193115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92437744140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92291259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02398681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.950439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92779541015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9212646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 556.1042450773652, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9705810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97064208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.884033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0501708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01141357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9388427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9471435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86895751953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13372802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 576.5106212223228, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9693603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94915771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [6, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8272705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.74981689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 444.99324420874984, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8818359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85467529296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8621826171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88482666015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938232421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9154052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8677978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85736083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9222412109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 567.2363230338814, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8905029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8768310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87493896484375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.79, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94610595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93951416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86285400390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9405517578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89117431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9681396484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 561.3939957153904, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.916259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8900146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9620361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8468017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08172607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8963623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97369384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96624755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 565.1672533156722, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01641845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91741943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97906494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93377685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8817138671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91033935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.803466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9432373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9241943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 564.3863450639242, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02960205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86810302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98382568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04705810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9481201171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89422607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.932861328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92510986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 566.5973158437805, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86151123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88433837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96588134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05517578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.895263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9244384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86724853515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.18231201171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.015869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 571.0529856876307, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01800537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9686279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9774169921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8953857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0140380859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99749755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94049072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [7, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.904052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 570.244768110895, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78289794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.79, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8101806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.73468017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92413330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88800048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92901611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.814208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79412841796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 469.0339847047376, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9886474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.76678466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8865966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.877197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80401611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.832763671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.725341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84552001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 573.3676513383086, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8594970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97454833984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89898681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.863525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92596435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96124267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90216064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 574.9317342927358, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8924560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.928955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99713134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08917236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83929443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8880615234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88873291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00555419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.73919677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 573.82030626832, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0775146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86798095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9541015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96783447265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95538330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99176025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9661865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 562.1920114241282, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.020263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.972412109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9315185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01971435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96453857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96722412109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9183349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95294189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 571.7613121216148, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8988037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.09783935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03515625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9989013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01422119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9864501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86346435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93499755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 558.9910597231104, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.81903076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0323486328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [8, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7816162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88311767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9239501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.81005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 440.7401054764529, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7730712890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9024658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80145263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80615234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9471435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0303955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87921142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 564.337711196518, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9085693359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.78, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.73345947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80206298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.916259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8074951171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9952392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99249267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 559.5793015195344, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88726806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86572265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96881103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88702392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9696044921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92523193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9544677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 547.5178738207987, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94122314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9337158203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0208740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03631591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97393798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96246337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0494384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.81549072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 551.8487495016799, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8983154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9752197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.87, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8819580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99969482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.965576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02691650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95599365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 549.6589266638944, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8226318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9971923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90887451171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9595947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9736328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.911865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85284423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 556.5315071993815, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5837.8125, 24512.0], "load": 0.87, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-resnet50.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "resnet50", "--batch-size", "64"], "time": 1712255751.11649, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/rwkv.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/rwkv.D0.data new file mode 100644 index 000000000..f67335a79 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/rwkv.D0.data @@ -0,0 +1,437 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "rwkv", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv", "tags": ["llm", "rnn", "unsupported-rocm"], "plan": {"method": "per_gpu"}, "argv": {"--data_type": "dummy", "--ctx_len": 128, "--epoch_steps": 1000, "--epoch_count": 20, "--epoch_begin": 0, "--epoch_save": 0, "--micro_bsz": 16, "--n_layer": 12, "--n_embd": 768, "--pre_ffn": 0, "--head_qk": 0, "--lr_init": "6e-4", "--lr_final": "1e-5", "--warmup_steps": 0, "--beta1": 0.9, "--beta2": 0.99, "--adam_eps": "1e-8", "--accelerator": "gpu", "--devices": 1, "--precision": "tf32", "--strategy": "ddp_find_unused_parameters_false", "--grad_cp": 0, "--random_seed": 1234, "--enable_progress_bar": "False"}, "weight": 1.0, "name": "rwkv", "tag": ["rwkv", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712256283.646602, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv/voirconf-rwkv.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py", "--data_type", "dummy", "--ctx_len", "128", "--epoch_steps", "1000", "--epoch_count", "20", "--epoch_begin", "0", "--epoch_save", "0", "--micro_bsz", "16", "--n_layer", "12", "--n_embd", "768", "--pre_ffn", "0", "--head_qk", "0", "--lr_init", "6e-4", "--lr_final", "1e-5", "--warmup_steps", "0", "--beta1", "0.9", "--beta2", "0.99", "--adam_eps", "1e-8", "--accelerator", "gpu", "--devices", "1", "--precision", "tf32", "--strategy", "ddp_find_unused_parameters_false", "--grad_cp", "0", "--random_seed", "1234", "--enable_progress_bar", "False"], "time": 1712256285.998636}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "[2024-04-04 18:44:47,872] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", "pipe": "stdout"} +{"event": "line", "data": "########## work in progress ##########\n", "pipe": "stderr"} +{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"} +{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"} +{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "line", "data": "Global seed set to 1234\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "############################################################################\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# RWKV-4 TF32 on 1x1 GPU, bsz 1x1x16=16, ddp_find_unused_parameters_false \n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Data = (dummy), ProjDir = /Users/satyaortiz-gagne/travail/mila/milabench/proj/rwkv/\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Epoch = 0 to 19 (will continue afterwards), save every 0 epoch\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Each \"epoch\" = 1000 steps, 16000 samples, 2048000 tokens\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Model = 12 n_layer, 768 n_embd, 128 ctx_len\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Adam = lr 0.0006 to 1e-05, warmup 0 steps, beta (0.9, 0.99), eps 1e-08\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Found torch 2.1.0+cu118, recommend 1.13.1+cu117 or newer\n", "pipe": "stderr"} +{"event": "line", "data": "# Found deepspeed 0.12.2, recommend 0.7.0 (faster than newer versions)\n", "pipe": "stderr"} +{"event": "line", "data": "# Found pytorch_lightning 1.9.5, recommend 1.9.1 or newer\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "############################################################################\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "{'load_model': '', 'wandb': '', 'proj_dir': '/Users/satyaortiz-gagne/travail/mila/milabench/proj/rwkv/', 'random_seed': 1234, 'data_file': '', 'data_type': 'dummy', 'vocab_size': 0, 'ctx_len': 128, 'epoch_steps': 1000, 'epoch_count': 20, 'epoch_begin': 0, 'epoch_save': 0, 'micro_bsz': 16, 'n_layer': 12, 'n_embd': 768, 'dim_att': 768, 'dim_ffn': 3072, 'pre_ffn': 0, 'head_qk': 0, 'tiny_att_dim': 0, 'tiny_att_layer': -999, 'lr_init': 0.0006, 'lr_final': 1e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.99, 'adam_eps': 1e-08, 'grad_cp': 0, 'my_pile_version': 1, 'my_pile_stage': 0, 'my_pile_shift': -1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'my_img_version': 0, 'my_img_size': 0, 'my_img_bit': 0, 'my_img_clip': 'x', 'my_img_clip_scale': 1, 'my_img_l1_scale': 0, 'my_img_encoder': 'x', 'my_sample_len': 0, 'my_ffn_shift': 1, 'my_att_shift': 1, 'my_pos_emb': 0, 'load_partial': 0, 'magic_prime': 0, 'my_qa_mask': 0, 'my_testing': '', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': False, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': None, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'ddp_find_unused_parameters_false', 'sync_batchnorm': False, 'precision': 'tf32', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2024-04-04-18-44-49', 'betas': (0.9, 0.99), 'real_bsz': 16, 'run_name': '0 ctx128 L12 D768'}\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "Building dummy data...\n", "pipe": "stderr"} +{"event": "line", "data": "Building token list...\n", "pipe": "stderr"} +{"event": "line", "data": "Data has 1620950 tokens, 13 vocab size.\n", "pipe": "stderr"} +{"event": "line", "data": "RWKV_MY_TESTING \n", "pipe": "stdout"} +{"event": "line", "data": "Using /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118 as PyTorch extensions root...\n", "pipe": "stderr"} +{"event": "line", "data": "Loading extension module wkv_128...\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "line", "data": "############################################################################\n", "pipe": "stdout"} +{"event": "line", "data": "#\n", "pipe": "stdout"} +{"event": "line", "data": "# Init model weight (slow for large models)...\n", "pipe": "stdout"} +{"event": "line", "data": "#\n", "pipe": "stdout"} +{"event": "line", "data": "############################################################################\n", "pipe": "stdout"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "line", "data": "13 768 -0.0006 emb.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.0.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.0.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.0.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.0.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.0.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.0.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.0.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.1.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.1.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.1.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.1.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.1.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.1.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.1.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.2.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.2.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.2.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.2.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.2.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.2.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.2.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.3.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.3.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.3.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.3.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.3.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.3.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.3.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.4.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.4.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.4.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.4.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.4.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.4.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.4.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.5.att.key.weight", "pipe": "stdout"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.5.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.5.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.5.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.5.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.5.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.5.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.6.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.6.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.6.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.6.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.6.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.6.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.6.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.7.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.7.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.7.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.7.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.7.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.7.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.7.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.8.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.8.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.8.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.8.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.8.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.8.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.8.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.9.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.9.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.9.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.9.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.9.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.9.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.9.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.10.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.10.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.10.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.10.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.10.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.10.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.10.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.11.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.11.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.11.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.11.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.11.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.11.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.11.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "13 768 0.5 head.weight\n", "pipe": "stdout"} +{"event": "line", "data": "GPU available: True (cuda), used: True\n", "pipe": "stderr"} +{"event": "line", "data": "TPU available: False, using: 0 TPU cores\n", "pipe": "stderr"} +{"event": "line", "data": "IPU available: False, using: 0 IPUs\n", "pipe": "stderr"} +{"event": "line", "data": "HPU available: False, using: 0 HPUs\n", "pipe": "stderr"} +{"event": "line", "data": "13 768 emb.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln0.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln0.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.0.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.0.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.1.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.1.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.2.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.2.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.3.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.3.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.4.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.4.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.5.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.5.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.6.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.6.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.7.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.7.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.8.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.8.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.9.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.9.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.10.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.10.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.11.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.11.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 ln_out.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 ln_out.bias\n", "pipe": "stdout"} +{"event": "line", "data": "13 768 head.weight\n", "pipe": "stdout"} +{"event": "line", "data": "[rank: 0] Global seed set to 1234\n", "pipe": "stderr"} +{"event": "line", "data": "Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/1\n", "pipe": "stderr"} +{"event": "line", "data": "----------------------------------------------------------------------------------------------------\n", "pipe": "stderr"} +{"event": "line", "data": "distributed_backend=nccl\n", "pipe": "stderr"} +{"event": "line", "data": "All distributed processes registered. Starting with 1 processes\n", "pipe": "stderr"} +{"event": "line", "data": "----------------------------------------------------------------------------------------------------\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n", "pipe": "stderr"} +{"event": "line", "data": "Installed CUDA version 11.5 does not match the version torch was compiled with 11.8 but since the APIs are compatible, accepting this combination\n", "pipe": "stdout"} +{"event": "line", "data": "Using /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118 as PyTorch extensions root...\n", "pipe": "stderr"} +{"event": "line", "data": "Loading extension module fused_adam...\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2869.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3975.8125, 24512.0], "load": 0.06, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3975.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3975.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3975.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3975.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3975.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "ImportError", "message": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118/fused_adam/fused_adam.so: cannot open shared object file: No such file or directory"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py\", line 420, in \n", "pipe": "stderr"} +{"event": "line", "data": " trainer.fit(model, data_loader)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 608, in fit\n", "pipe": "stderr"} +{"event": "line", "data": " call._call_and_handle_interrupt(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py\", line 36, in _call_and_handle_interrupt\n", "pipe": "stderr"} +{"event": "line", "data": " return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/launchers/subprocess_script.py\", line 88, in launch\n", "pipe": "stderr"} +{"event": "line", "data": " return function(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 650, in _fit_impl\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(model, ckpt_path=self.ckpt_path)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 1093, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " self.strategy.setup(self)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/ddp.py\", line 181, in setup\n", "pipe": "stderr"} +{"event": "line", "data": " self.setup_optimizers(trainer)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/strategy.py\", line 142, in setup_optimizers\n", "pipe": "stderr"} +{"event": "line", "data": " self.optimizers, self.lr_scheduler_configs, self.optimizer_frequencies = _init_optimizers_and_lr_schedulers(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/core/optimizer.py\", line 180, in _init_optimizers_and_lr_schedulers\n", "pipe": "stderr"} +{"event": "line", "data": " optim_conf = model.trainer._call_lightning_module_hook(\"configure_optimizers\", pl_module=model)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 1356, in _call_lightning_module_hook\n", "pipe": "stderr"} +{"event": "line", "data": " output = fn(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/src/model.py\", line 606, in configure_optimizers\n", "pipe": "stderr"} +{"event": "line", "data": " return FusedAdam(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/adam/fused_adam.py\", line 94, in __init__\n", "pipe": "stderr"} +{"event": "line", "data": " fused_adam_cuda = FusedAdamBuilder().load()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py\", line 452, in load\n", "pipe": "stderr"} +{"event": "line", "data": " return self.jit_load(verbose)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py\", line 501, in jit_load\n", "pipe": "stderr"} +{"event": "line", "data": " op_module = load(name=self.name,\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 1308, in load\n", "pipe": "stderr"} +{"event": "line", "data": " return _jit_compile(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 1736, in _jit_compile\n", "pipe": "stderr"} +{"event": "line", "data": " return _import_module_from_library(name, build_directory, is_python_module)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 2136, in _import_module_from_library\n", "pipe": "stderr"} +{"event": "line", "data": " module = importlib.util.module_from_spec(spec)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"\", line 571, in module_from_spec\n", "pipe": "stderr"} +{"event": "line", "data": " File \"\", line 1176, in create_module\n", "pipe": "stderr"} +{"event": "line", "data": " File \"\", line 241, in _call_with_frames_removed\n", "pipe": "stderr"} +{"event": "line", "data": "ImportError: /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118/fused_adam/fused_adam.so: cannot open shared object file: No such file or directory\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv/voirconf-rwkv.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py", "--data_type", "dummy", "--ctx_len", "128", "--epoch_steps", "1000", "--epoch_count", "20", "--epoch_begin", "0", "--epoch_save", "0", "--micro_bsz", "16", "--n_layer", "12", "--n_embd", "768", "--pre_ffn", "0", "--head_qk", "0", "--lr_init", "6e-4", "--lr_final", "1e-5", "--warmup_steps", "0", "--beta1", "0.9", "--beta2", "0.99", "--adam_eps", "1e-8", "--accelerator", "gpu", "--devices", "1", "--precision", "tf32", "--strategy", "ddp_find_unused_parameters_false", "--grad_cp", "0", "--random_seed", "1234", "--enable_progress_bar", "False"], "time": 1712256321.2152421, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/rwkv.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/rwkv.D1.data new file mode 100644 index 000000000..f3e001267 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/rwkv.D1.data @@ -0,0 +1,478 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "rwkv", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv", "tags": ["llm", "rnn", "unsupported-rocm"], "plan": {"method": "per_gpu"}, "argv": {"--data_type": "dummy", "--ctx_len": 128, "--epoch_steps": 1000, "--epoch_count": 20, "--epoch_begin": 0, "--epoch_save": 0, "--micro_bsz": 16, "--n_layer": 12, "--n_embd": 768, "--pre_ffn": 0, "--head_qk": 0, "--lr_init": "6e-4", "--lr_final": "1e-5", "--warmup_steps": 0, "--beta1": 0.9, "--beta2": 0.99, "--adam_eps": "1e-8", "--accelerator": "gpu", "--devices": 1, "--precision": "tf32", "--strategy": "ddp_find_unused_parameters_false", "--grad_cp": 0, "--random_seed": 1234, "--enable_progress_bar": "False"}, "weight": 1.0, "name": "rwkv", "tag": ["rwkv", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712256285.982062, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv/voirconf-rwkv.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py", "--data_type", "dummy", "--ctx_len", "128", "--epoch_steps", "1000", "--epoch_count", "20", "--epoch_begin", "0", "--epoch_save", "0", "--micro_bsz", "16", "--n_layer", "12", "--n_embd", "768", "--pre_ffn", "0", "--head_qk", "0", "--lr_init", "6e-4", "--lr_final", "1e-5", "--warmup_steps", "0", "--beta1", "0.9", "--beta2", "0.99", "--adam_eps", "1e-8", "--accelerator", "gpu", "--devices", "1", "--precision", "tf32", "--strategy", "ddp_find_unused_parameters_false", "--grad_cp", "0", "--random_seed", "1234", "--enable_progress_bar", "False"], "time": 1712256286.0277565}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "[2024-04-04 18:44:47,845] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", "pipe": "stdout"} +{"event": "line", "data": "########## work in progress ##########\n", "pipe": "stderr"} +{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"} +{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"} +{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "line", "data": "Global seed set to 1234\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "############################################################################\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# RWKV-4 TF32 on 1x1 GPU, bsz 1x1x16=16, ddp_find_unused_parameters_false \n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Data = (dummy), ProjDir = /Users/satyaortiz-gagne/travail/mila/milabench/proj/rwkv/\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Epoch = 0 to 19 (will continue afterwards), save every 0 epoch\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Each \"epoch\" = 1000 steps, 16000 samples, 2048000 tokens\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Model = 12 n_layer, 768 n_embd, 128 ctx_len\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Adam = lr 0.0006 to 1e-05, warmup 0 steps, beta (0.9, 0.99), eps 1e-08\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Found torch 2.1.0+cu118, recommend 1.13.1+cu117 or newer\n", "pipe": "stderr"} +{"event": "line", "data": "# Found deepspeed 0.12.2, recommend 0.7.0 (faster than newer versions)\n", "pipe": "stderr"} +{"event": "line", "data": "# Found pytorch_lightning 1.9.5, recommend 1.9.1 or newer\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "############################################################################\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "{'load_model': '', 'wandb': '', 'proj_dir': '/Users/satyaortiz-gagne/travail/mila/milabench/proj/rwkv/', 'random_seed': 1234, 'data_file': '', 'data_type': 'dummy', 'vocab_size': 0, 'ctx_len': 128, 'epoch_steps': 1000, 'epoch_count': 20, 'epoch_begin': 0, 'epoch_save': 0, 'micro_bsz': 16, 'n_layer': 12, 'n_embd': 768, 'dim_att': 768, 'dim_ffn': 3072, 'pre_ffn': 0, 'head_qk': 0, 'tiny_att_dim': 0, 'tiny_att_layer': -999, 'lr_init': 0.0006, 'lr_final': 1e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.99, 'adam_eps': 1e-08, 'grad_cp': 0, 'my_pile_version': 1, 'my_pile_stage': 0, 'my_pile_shift': -1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'my_img_version': 0, 'my_img_size': 0, 'my_img_bit': 0, 'my_img_clip': 'x', 'my_img_clip_scale': 1, 'my_img_l1_scale': 0, 'my_img_encoder': 'x', 'my_sample_len': 0, 'my_ffn_shift': 1, 'my_att_shift': 1, 'my_pos_emb': 0, 'load_partial': 0, 'magic_prime': 0, 'my_qa_mask': 0, 'my_testing': '', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': False, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': None, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'ddp_find_unused_parameters_false', 'sync_batchnorm': False, 'precision': 'tf32', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2024-04-04-18-44-49', 'betas': (0.9, 0.99), 'real_bsz': 16, 'run_name': '0 ctx128 L12 D768'}\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "Building dummy data...\n", "pipe": "stderr"} +{"event": "line", "data": "Building token list...\n", "pipe": "stderr"} +{"event": "line", "data": "Data has 1620950 tokens, 13 vocab size.\n", "pipe": "stderr"} +{"event": "line", "data": "RWKV_MY_TESTING \n", "pipe": "stdout"} +{"event": "line", "data": "Using /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118 as PyTorch extensions root...\n", "pipe": "stderr"} +{"event": "line", "data": "Creating extension directory /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118/wkv_128...\n", "pipe": "stderr"} +{"event": "line", "data": "Detected CUDA files, patching ldflags\n", "pipe": "stderr"} +{"event": "line", "data": "Emitting ninja build file /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118/wkv_128/build.ninja...\n", "pipe": "stderr"} +{"event": "line", "data": "Building extension module wkv_128...\n", "pipe": "stderr"} +{"event": "line", "data": "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", "pipe": "stderr"} +{"event": "line", "data": "[1/3] /usr/bin/nvcc -DTORCH_EXTENSION_NAME=wkv_128 -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/TH -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/THC -isystem /home/ubuntu/miniconda3/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' -res-usage --maxrregcount 60 --use_fast_math -O3 -Xptxas -O3 --extra-device-vectorization -DTmax=128 -std=c++17 -c /mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/cuda/wkv_cuda.cu -o wkv_cuda.cuda.o \n", "pipe": "stdout"} +{"event": "line", "data": "ptxas info : 0 bytes gmem\n", "pipe": "stdout"} +{"event": "line", "data": "ptxas info : Compiling entry function '_Z15kernel_backwardIfEviiiPKT_S2_S2_S2_S2_S2_PS0_S3_S3_S3_' for 'sm_86'\n", "pipe": "stdout"} +{"event": "line", "data": "ptxas info : Function properties for _Z15kernel_backwardIfEviiiPKT_S2_S2_S2_S2_S2_PS0_S3_S3_S3_\n", "pipe": "stdout"} +{"event": "line", "data": " 1024 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads\n", "pipe": "stdout"} +{"event": "line", "data": "ptxas info : Used 48 registers, 448 bytes cmem[0], 16 bytes cmem[2]\n", "pipe": "stdout"} +{"event": "line", "data": "ptxas info : Compiling entry function '_Z14kernel_forwardIfEviiiPKT_S2_S2_S2_PS0_' for 'sm_86'\n", "pipe": "stdout"} +{"event": "line", "data": "ptxas info : Function properties for _Z14kernel_forwardIfEviiiPKT_S2_S2_S2_PS0_\n", "pipe": "stdout"} +{"event": "line", "data": " 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads\n", "pipe": "stdout"} +{"event": "line", "data": "ptxas info : Used 40 registers, 408 bytes cmem[0]\n", "pipe": "stdout"} +{"event": "line", "data": "[2/3] c++ -MMD -MF wkv_op.o.d -DTORCH_EXTENSION_NAME=wkv_128 -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/TH -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/THC -isystem /home/ubuntu/miniconda3/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++17 -c /mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/cuda/wkv_op.cpp -o wkv_op.o \n", "pipe": "stdout"} +{"event": "line", "data": "[3/3] c++ wkv_op.o wkv_cuda.cuda.o -shared -L/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda -ltorch -ltorch_python -L/usr/lib64 -lcudart -o wkv_128.so\n", "pipe": "stdout"} +{"event": "line", "data": "Loading extension module wkv_128...\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "line", "data": "############################################################################\n", "pipe": "stdout"} +{"event": "line", "data": "#\n", "pipe": "stdout"} +{"event": "line", "data": "# Init model weight (slow for large models)...\n", "pipe": "stdout"} +{"event": "line", "data": "#\n", "pipe": "stdout"} +{"event": "line", "data": "############################################################################\n", "pipe": "stdout"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "line", "data": "13 768 -0.0006 emb.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.0.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.0.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.0.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.0.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.0.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.0.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.0.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.1.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.1.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.1.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.1.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.1.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.1.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.1.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.2.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.2.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.2.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.2.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.2.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.2.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.2.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.3.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.3.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.3.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.3.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.3.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.3.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.3.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.4.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.4.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.4.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.4.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.4.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.4.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.4.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.5.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.5.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.5.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.5.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.5.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.5.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.5.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.6.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.6.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.6.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.6.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.6.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.6.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.6.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.7.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.7.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.7.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.7.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.7.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.7.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.7.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.8.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.8.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.8.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.8.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.8.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.8.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.8.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.9.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.9.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.9.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.9.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.9.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.9.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.9.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.10.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.10.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.10.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.10.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.10.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.10.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.10.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.11.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.11.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.11.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.11.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.11.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.11.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.11.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "13 768 0.5 head.weight\n", "pipe": "stdout"} +{"event": "line", "data": "GPU available: True (cuda), used: True\n", "pipe": "stderr"} +{"event": "line", "data": "TPU available: False, using: 0 TPU cores\n", "pipe": "stderr"} +{"event": "line", "data": "IPU available: False, using: 0 IPUs\n", "pipe": "stderr"} +{"event": "line", "data": "HPU available: False, using: 0 HPUs\n", "pipe": "stderr"} +{"event": "line", "data": "13 768 emb.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln0.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln0.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.0.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.0.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.1.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.1.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.2.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.2.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.3.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.3.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.4.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.4.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.5.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.5.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.6.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.6.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.7.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.7.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.8.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.8.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.9.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.9.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.10.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.10.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.11.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.11.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 ln_out.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 ln_out.bias\n", "pipe": "stdout"} +{"event": "line", "data": "13 768 head.weight\n", "pipe": "stdout"} +{"event": "line", "data": "[rank: 0] Global seed set to 1234\n", "pipe": "stderr"} +{"event": "line", "data": "Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/1\n", "pipe": "stderr"} +{"event": "line", "data": "----------------------------------------------------------------------------------------------------\n", "pipe": "stderr"} +{"event": "line", "data": "distributed_backend=nccl\n", "pipe": "stderr"} +{"event": "line", "data": "All distributed processes registered. Starting with 1 processes\n", "pipe": "stderr"} +{"event": "line", "data": "----------------------------------------------------------------------------------------------------\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]\n", "pipe": "stderr"} +{"event": "line", "data": "Installed CUDA version 11.5 does not match the version torch was compiled with 11.8 but since the APIs are compatible, accepting this combination\n", "pipe": "stdout"} +{"event": "line", "data": "Using /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118 as PyTorch extensions root...\n", "pipe": "stderr"} +{"event": "line", "data": "Creating extension directory /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118/fused_adam...\n", "pipe": "stderr"} +{"event": "line", "data": "Detected CUDA files, patching ldflags\n", "pipe": "stderr"} +{"event": "line", "data": "Emitting ninja build file /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118/fused_adam/build.ninja...\n", "pipe": "stderr"} +{"event": "line", "data": "Building extension module fused_adam...\n", "pipe": "stderr"} +{"event": "line", "data": "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", "pipe": "stderr"} +{"event": "line", "data": "[1/3] /usr/bin/nvcc -DTORCH_EXTENSION_NAME=fused_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/includes -I/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/TH -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/THC -isystem /home/ubuntu/miniconda3/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' -O3 -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -lineinfo --use_fast_math -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_86,code=compute_86 -DBF16_AVAILABLE -U__CUDA_NO_BFLOAT16_OPERATORS__ -U__CUDA_NO_BFLOAT162_OPERATORS__ -std=c++17 -c /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam/multi_tensor_adam.cu -o multi_tensor_adam.cuda.o \n", "pipe": "stdout"} +{"event": "line", "data": "FAILED: multi_tensor_adam.cuda.o \n", "pipe": "stdout"} +{"event": "line", "data": "/usr/bin/nvcc -DTORCH_EXTENSION_NAME=fused_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/includes -I/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/TH -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/THC -isystem /home/ubuntu/miniconda3/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' -O3 -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -lineinfo --use_fast_math -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_86,code=compute_86 -DBF16_AVAILABLE -U__CUDA_NO_BFLOAT16_OPERATORS__ -U__CUDA_NO_BFLOAT162_OPERATORS__ -std=c++17 -c /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam/multi_tensor_adam.cu -o multi_tensor_adam.cuda.o \n", "pipe": "stdout"} +{"event": "line", "data": "/usr/include/c++/11/bits/std_function.h:435:145: error: parameter packs not expanded with \u2018...\u2019:\n", "pipe": "stdout"} +{"event": "line", "data": " 435 | function(_Functor&& __f)\n", "pipe": "stdout"} +{"event": "line", "data": " | ^ \n", "pipe": "stdout"} +{"event": "line", "data": "/usr/include/c++/11/bits/std_function.h:435:145: note: \u2018_ArgTypes\u2019\n", "pipe": "stdout"} +{"event": "line", "data": "/usr/include/c++/11/bits/std_function.h:530:146: error: parameter packs not expanded with \u2018...\u2019:\n", "pipe": "stdout"} +{"event": "line", "data": " 530 | operator=(_Functor&& __f)\n", "pipe": "stdout"} +{"event": "line", "data": " | ^ \n", "pipe": "stdout"} +{"event": "line", "data": "/usr/include/c++/11/bits/std_function.h:530:146: note: \u2018_ArgTypes\u2019\n", "pipe": "stdout"} +{"event": "line", "data": "[2/3] c++ -MMD -MF fused_adam_frontend.o.d -DTORCH_EXTENSION_NAME=fused_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/includes -I/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/TH -isystem /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/THC -isystem /home/ubuntu/miniconda3/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++17 -O3 -std=c++17 -g -Wno-reorder -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -DBF16_AVAILABLE -c /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam/fused_adam_frontend.cpp -o fused_adam_frontend.o \n", "pipe": "stdout"} +{"event": "line", "data": "ninja: build stopped: subcommand failed.\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2869.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 2100, in _run_ninja_build\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3975.8125, 24512.0], "load": 0.06, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3975.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3975.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3975.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3975.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3975.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "RuntimeError", "message": "Error building extension 'fused_adam'"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": " subprocess.run(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/subprocess.py\", line 526, in run\n", "pipe": "stderr"} +{"event": "line", "data": " raise CalledProcessError(retcode, process.args,\n", "pipe": "stderr"} +{"event": "line", "data": "subprocess.CalledProcessError: Command '['ninja', '-v']' returned non-zero exit status 1.\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "The above exception was the direct cause of the following exception:\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py\", line 420, in \n", "pipe": "stderr"} +{"event": "line", "data": " trainer.fit(model, data_loader)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 608, in fit\n", "pipe": "stderr"} +{"event": "line", "data": " call._call_and_handle_interrupt(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py\", line 36, in _call_and_handle_interrupt\n", "pipe": "stderr"} +{"event": "line", "data": " return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/launchers/subprocess_script.py\", line 88, in launch\n", "pipe": "stderr"} +{"event": "line", "data": " return function(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 650, in _fit_impl\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(model, ckpt_path=self.ckpt_path)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 1093, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " self.strategy.setup(self)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/ddp.py\", line 181, in setup\n", "pipe": "stderr"} +{"event": "line", "data": " self.setup_optimizers(trainer)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/strategy.py\", line 142, in setup_optimizers\n", "pipe": "stderr"} +{"event": "line", "data": " self.optimizers, self.lr_scheduler_configs, self.optimizer_frequencies = _init_optimizers_and_lr_schedulers(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/core/optimizer.py\", line 180, in _init_optimizers_and_lr_schedulers\n", "pipe": "stderr"} +{"event": "line", "data": " optim_conf = model.trainer._call_lightning_module_hook(\"configure_optimizers\", pl_module=model)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 1356, in _call_lightning_module_hook\n", "pipe": "stderr"} +{"event": "line", "data": " output = fn(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/src/model.py\", line 606, in configure_optimizers\n", "pipe": "stderr"} +{"event": "line", "data": " return FusedAdam(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/adam/fused_adam.py\", line 94, in __init__\n", "pipe": "stderr"} +{"event": "line", "data": " fused_adam_cuda = FusedAdamBuilder().load()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py\", line 452, in load\n", "pipe": "stderr"} +{"event": "line", "data": " return self.jit_load(verbose)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py\", line 501, in jit_load\n", "pipe": "stderr"} +{"event": "line", "data": " op_module = load(name=self.name,\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 1308, in load\n", "pipe": "stderr"} +{"event": "line", "data": " return _jit_compile(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 1710, in _jit_compile\n", "pipe": "stderr"} +{"event": "line", "data": " _write_ninja_file_and_build_library(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 1823, in _write_ninja_file_and_build_library\n", "pipe": "stderr"} +{"event": "line", "data": " _run_ninja_build(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 2116, in _run_ninja_build\n", "pipe": "stderr"} +{"event": "line", "data": " raise RuntimeError(message) from e\n", "pipe": "stderr"} +{"event": "line", "data": "RuntimeError: Error building extension 'fused_adam'\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv/voirconf-rwkv.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py", "--data_type", "dummy", "--ctx_len", "128", "--epoch_steps", "1000", "--epoch_count", "20", "--epoch_begin", "0", "--epoch_save", "0", "--micro_bsz", "16", "--n_layer", "12", "--n_embd", "768", "--pre_ffn", "0", "--head_qk", "0", "--lr_init", "6e-4", "--lr_final", "1e-5", "--warmup_steps", "0", "--beta1", "0.9", "--beta2", "0.99", "--adam_eps", "1e-8", "--accelerator", "gpu", "--devices", "1", "--precision", "tf32", "--strategy", "ddp_find_unused_parameters_false", "--grad_cp", "0", "--random_seed", "1234", "--enable_progress_bar", "False"], "time": 1712256320.832648, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/stargan.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/stargan.D0.data new file mode 100644 index 000000000..3f64fecb5 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/stargan.D0.data @@ -0,0 +1,169 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "stargan", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "tags": ["gan", "resnet", "vision"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan", "plan": {"method": "per_gpu"}, "argv": {"--image_size": 512, "--c_dim": 5, "--batch_size": 16}, "weight": 1.0, "name": "stargan", "tag": ["stargan", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712256054.185817, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/voirconf-stargan.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py", "--image_size", "512", "--c_dim", "5", "--batch_size", "16"], "time": 1712256056.526606}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Namespace(c_dim=5, c2_dim=8, celeba_crop_size=178, rafd_crop_size=256, image_size=512, g_conv_dim=64, d_conv_dim=64, g_repeat_num=6, d_repeat_num=6, lambda_cls=1, lambda_rec=10, lambda_gp=10, dataset='synth', batch_size=16, num_iters=200000, num_iters_decay=100000, g_lr=0.0001, d_lr=0.0001, n_critic=5, beta1=0.5, beta2=0.999, resume_iters=None, selected_attrs=['Black_Hair', 'Blond_Hair', 'Brown_Hair', 'Male', 'Young'], test_iters=200000, num_workers=1, mode='train', use_tensorboard=False, celeba_image_dir='data/celeba/images', attr_path='data/celeba/list_attr_celeba.txt', rafd_image_dir='data/RaFD/train', log_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/logs', model_save_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/models', sample_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/samples', result_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/results', log_step=10, sample_step=1000, model_save_step=10000, lr_update_step=1000)\n", "pipe": "stdout"} +{"event": "line", "data": "Generator(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(8, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (5): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (6): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (7): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (8): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (9): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (10): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (11): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (12): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (13): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (14): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (15): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (16): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (17): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (18): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (19): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (20): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (21): Conv2d(64, 3, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (22): Tanh()\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": ")\n", "pipe": "stdout"} +{"event": "line", "data": "G\n", "pipe": "stdout"} +{"event": "line", "data": "The number of parameters: 8430528\n", "pipe": "stdout"} +{"event": "line", "data": "Discriminator(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (1): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (3): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (5): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (6): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (7): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (8): Conv2d(512, 1024, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (9): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (10): Conv2d(1024, 2048, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (11): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (conv1): Conv2d(2048, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (conv2): Conv2d(2048, 5, kernel_size=(8, 8), stride=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": ")\n", "pipe": "stdout"} +{"event": "line", "data": "D\n", "pipe": "stdout"} +{"event": "line", "data": "The number of parameters: 45376448\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [0, 10000]}, "pipe": "data"} +{"event": "line", "data": "Start training...\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [1, 10000]}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(warning.format(ret))\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 13.11575698852539}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [16217.8125, 24512.0], "load": 0.65, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [20421.8125, 24512.0], "load": 0.94, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [20161.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24403.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 256.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 108.19 MiB is free. Including non-PyTorch memory, this process has 21.48 GiB memory in use. Of the allocated memory 20.76 GiB is allocated by PyTorch, and 433.74 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py\", line 222, in \n", "pipe": "stderr"} +{"event": "line", "data": " main(config)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py\", line 77, in main\n", "pipe": "stderr"} +{"event": "line", "data": " solver.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/solver.py\", line 282, in train\n", "pipe": "stderr"} +{"event": "line", "data": " x_fake = self.G(x_real, c_trg)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/model.py\", line 94, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self.main(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/model.py\", line 20, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return x + self.main(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 256.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 108.19 MiB is free. Including non-PyTorch memory, this process has 21.48 GiB memory in use. Of the allocated memory 20.76 GiB is allocated by PyTorch, and 433.74 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/voirconf-stargan.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py", "--image_size", "512", "--c_dim", "5", "--batch_size", "16"], "time": 1712256068.2847652, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/stargan.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/stargan.D1.data new file mode 100644 index 000000000..db92b9066 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/stargan.D1.data @@ -0,0 +1,169 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "stargan", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "tags": ["gan", "resnet", "vision"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan", "plan": {"method": "per_gpu"}, "argv": {"--image_size": 512, "--c_dim": 5, "--batch_size": 16}, "weight": 1.0, "name": "stargan", "tag": ["stargan", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712256056.507142, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/voirconf-stargan.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py", "--image_size", "512", "--c_dim", "5", "--batch_size", "16"], "time": 1712256056.535834}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Namespace(c_dim=5, c2_dim=8, celeba_crop_size=178, rafd_crop_size=256, image_size=512, g_conv_dim=64, d_conv_dim=64, g_repeat_num=6, d_repeat_num=6, lambda_cls=1, lambda_rec=10, lambda_gp=10, dataset='synth', batch_size=16, num_iters=200000, num_iters_decay=100000, g_lr=0.0001, d_lr=0.0001, n_critic=5, beta1=0.5, beta2=0.999, resume_iters=None, selected_attrs=['Black_Hair', 'Blond_Hair', 'Brown_Hair', 'Male', 'Young'], test_iters=200000, num_workers=1, mode='train', use_tensorboard=False, celeba_image_dir='data/celeba/images', attr_path='data/celeba/list_attr_celeba.txt', rafd_image_dir='data/RaFD/train', log_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/logs', model_save_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/models', sample_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/samples', result_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/results', log_step=10, sample_step=1000, model_save_step=10000, lr_update_step=1000)\n", "pipe": "stdout"} +{"event": "line", "data": "Generator(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(8, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (5): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (6): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (7): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (8): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (9): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (10): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (11): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (12): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (13): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (14): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (15): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (16): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (17): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (18): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (19): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (20): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (21): Conv2d(64, 3, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (22): Tanh()\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": ")\n", "pipe": "stdout"} +{"event": "line", "data": "G\n", "pipe": "stdout"} +{"event": "line", "data": "The number of parameters: 8430528\n", "pipe": "stdout"} +{"event": "line", "data": "Discriminator(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (1): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (3): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (5): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (6): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (7): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (8): Conv2d(512, 1024, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (9): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (10): Conv2d(1024, 2048, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (11): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (conv1): Conv2d(2048, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (conv2): Conv2d(2048, 5, kernel_size=(8, 8), stride=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": ")\n", "pipe": "stdout"} +{"event": "line", "data": "D\n", "pipe": "stdout"} +{"event": "line", "data": "The number of parameters: 45376448\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [0, 10000]}, "pipe": "data"} +{"event": "line", "data": "Start training...\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [1, 10000]}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(warning.format(ret))\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 13.079126358032227}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [16217.8125, 24512.0], "load": 0.65, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [20421.8125, 24512.0], "load": 0.92, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [20161.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24403.8125, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 256.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 108.19 MiB is free. Including non-PyTorch memory, this process has 21.48 GiB memory in use. Of the allocated memory 20.76 GiB is allocated by PyTorch, and 433.74 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py\", line 222, in \n", "pipe": "stderr"} +{"event": "line", "data": " main(config)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py\", line 77, in main\n", "pipe": "stderr"} +{"event": "line", "data": " solver.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/solver.py\", line 282, in train\n", "pipe": "stderr"} +{"event": "line", "data": " x_fake = self.G(x_real, c_trg)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/model.py\", line 94, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self.main(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/model.py\", line 20, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return x + self.main(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 256.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 108.19 MiB is free. Including non-PyTorch memory, this process has 21.48 GiB memory in use. Of the allocated memory 20.76 GiB is allocated by PyTorch, and 433.74 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/voirconf-stargan.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py", "--image_size", "512", "--c_dim", "5", "--batch_size", "16"], "time": 1712256068.359305, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/super-slomo.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/super-slomo.D0.data new file mode 100644 index 000000000..6b2a1b003 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/super-slomo.D0.data @@ -0,0 +1,57 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "super-slomo", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "tags": ["convnet", "unet", "video-interpolation", "vision"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo", "plan": {"method": "per_gpu"}, "argv": {"--train_batch_size": 32}, "weight": 1.0, "name": "super-slomo", "tag": ["super-slomo", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712256070.689504, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo/voirconf-super-slomo.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py", "--train_batch_size", "32"], "time": 1712256073.038403}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(msg)\n", "pipe": "stderr"} +{"event": "line", "data": "Epoch: 0\n", "pipe": "stdout"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:136: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\"Detected call of `lr_scheduler.step()` before `optimizer.step()`. \"\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "progress": [0, 10000]}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/functional.py:4296: UserWarning: Default grid_sample and affine_grid behavior has changed to align_corners=False since 1.3.0. Please specify align_corners=True if the old behavior is desired. See the documentation of grid_sample for details.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24457.8125, 24512.0], "load": 0.11, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 484.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 54.19 MiB is free. Including non-PyTorch memory, this process has 21.53 GiB memory in use. Of the allocated memory 20.51 GiB is allocated by PyTorch, and 765.90 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py\", line 274, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py\", line 219, in main\n", "pipe": "stderr"} +{"event": "line", "data": " intrpOut = ArbTimeFlowIntrp(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/model.py\", line 209, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.up5(x, s1)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/model.py\", line 139, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = F.leaky_relu(self.conv2(torch.cat((x, skpCn), 1)), negative_slope=0.1)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 484.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 54.19 MiB is free. Including non-PyTorch memory, this process has 21.53 GiB memory in use. Of the allocated memory 20.51 GiB is allocated by PyTorch, and 765.90 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo/voirconf-super-slomo.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py", "--train_batch_size", "32"], "time": 1712256077.6808748, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/super-slomo.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/super-slomo.D1.data new file mode 100644 index 000000000..1dfd9d19c --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/super-slomo.D1.data @@ -0,0 +1,57 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "super-slomo", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "tags": ["convnet", "unet", "video-interpolation", "vision"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo", "plan": {"method": "per_gpu"}, "argv": {"--train_batch_size": 32}, "weight": 1.0, "name": "super-slomo", "tag": ["super-slomo", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712256073.022484, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo/voirconf-super-slomo.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py", "--train_batch_size", "32"], "time": 1712256073.0449874}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(msg)\n", "pipe": "stderr"} +{"event": "line", "data": "Epoch: 0\n", "pipe": "stdout"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:136: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\"Detected call of `lr_scheduler.step()` before `optimizer.step()`. \"\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "progress": [0, 10000]}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/functional.py:4296: UserWarning: Default grid_sample and affine_grid behavior has changed to align_corners=False since 1.3.0. Please specify align_corners=True if the old behavior is desired. See the documentation of grid_sample for details.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24457.8125, 24512.0], "load": 0.08, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 484.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 54.19 MiB is free. Including non-PyTorch memory, this process has 21.53 GiB memory in use. Of the allocated memory 20.51 GiB is allocated by PyTorch, and 765.90 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py\", line 274, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py\", line 219, in main\n", "pipe": "stderr"} +{"event": "line", "data": " intrpOut = ArbTimeFlowIntrp(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/model.py\", line 209, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.up5(x, s1)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/model.py\", line 139, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = F.leaky_relu(self.conv2(torch.cat((x, skpCn), 1)), negative_slope=0.1)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 484.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 54.19 MiB is free. Including non-PyTorch memory, this process has 21.53 GiB memory in use. Of the allocated memory 20.51 GiB is allocated by PyTorch, and 765.90 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo/voirconf-super-slomo.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py", "--train_batch_size", "32"], "time": 1712256077.391992, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/t5.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/t5.D0.data new file mode 100644 index 000000000..a66f634cb --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/t5.D0.data @@ -0,0 +1,64 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "T5", "--batch-size": 16}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "transformer"], "weight": 2.0, "name": "t5", "tag": ["t5", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255843.55336, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-t5.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "T5", "--batch-size", "16"], "time": 1712255845.9189045}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24097.8125, 24512.0], "load": 0.03, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 512.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 414.19 MiB is free. Including non-PyTorch memory, this process has 21.18 GiB memory in use. Of the allocated memory 20.75 GiB is allocated by PyTorch, and 157.31 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"} +{"event": "line", "data": " runner.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"} +{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"} +{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 1746, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " decoder_outputs = self.decoder(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 1113, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " layer_outputs = layer_module(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 694, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " self_attention_outputs = self.layer[0](\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 601, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " attention_output = self.SelfAttention(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 561, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " attn_weights = nn.functional.softmax(scores.float(), dim=-1).type_as(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/functional.py\", line 1856, in softmax\n", "pipe": "stderr"} +{"event": "line", "data": " ret = input.softmax(dim)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 512.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 414.19 MiB is free. Including non-PyTorch memory, this process has 21.18 GiB memory in use. Of the allocated memory 20.75 GiB is allocated by PyTorch, and 157.31 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-t5.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "T5", "--batch-size", "16"], "time": 1712255849.5068624, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/t5.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/t5.D1.data new file mode 100644 index 000000000..c521dc7f4 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/t5.D1.data @@ -0,0 +1,64 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "T5", "--batch-size": 16}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "transformer"], "weight": 2.0, "name": "t5", "tag": ["t5", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255845.903405, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-t5.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "T5", "--batch-size", "16"], "time": 1712255845.926259}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24097.8125, 24512.0], "load": 0.02, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 512.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 414.19 MiB is free. Including non-PyTorch memory, this process has 21.18 GiB memory in use. Of the allocated memory 20.75 GiB is allocated by PyTorch, and 157.31 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"} +{"event": "line", "data": " runner.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"} +{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"} +{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 1746, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " decoder_outputs = self.decoder(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 1113, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " layer_outputs = layer_module(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 694, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " self_attention_outputs = self.layer[0](\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 601, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " attention_output = self.SelfAttention(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 561, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " attn_weights = nn.functional.softmax(scores.float(), dim=-1).type_as(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/functional.py\", line 1856, in softmax\n", "pipe": "stderr"} +{"event": "line", "data": " ret = input.softmax(dim)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 512.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 414.19 MiB is free. Including non-PyTorch memory, this process has 21.18 GiB memory in use. Of the allocated memory 20.75 GiB is allocated by PyTorch, and 157.31 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-t5.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "T5", "--batch-size", "16"], "time": 1712255849.632406, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/tf32.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/tf32.D0.data new file mode 100644 index 000000000..0df40f92d --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/tf32.D0.data @@ -0,0 +1,182 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp32", "--tf32": true}, "weight": 0.0, "name": "tf32", "tag": ["tf32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255494.506738, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32", "--tf32"], "time": 1712255496.8402894}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 44.32395615769511, "units": "Tflops", "t": 1712255498.6289868}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2409.75, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712255497.979648}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.04, "temperature": null, "power": null}}, "t": 1712255498.4867253}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.113888221337106, "units": "Tflops", "t": 1712255499.1171482}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.24, "temperature": null, "power": null}}, "t": 1712255498.993217}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.937108279575895, "units": "Tflops", "t": 1712255499.5959725}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.44, "temperature": null, "power": null}}, "t": 1712255499.4994547}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.68278843097592, "units": "Tflops", "t": 1712255500.0882452}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.57, "temperature": null, "power": null}}, "t": 1712255500.0061934}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.74688227388384, "units": "Tflops", "t": 1712255500.569585}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.77, "temperature": null, "power": null}}, "t": 1712255500.5126212}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.99754622200159, "units": "Tflops", "t": 1712255501.0584092}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.9, "temperature": null, "power": null}}, "t": 1712255501.018457}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.407794765119164, "units": "Tflops", "t": 1712255501.542985}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255501.524528}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.636620572837444, "units": "Tflops", "t": 1712255502.0357597}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255502.030958}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.01992714931719, "units": "Tflops", "t": 1712255502.524601}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.25808796513544, "units": "Tflops", "t": 1712255503.010978}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255502.5372303}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.565022200404776, "units": "Tflops", "t": 1712255503.4940054}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255503.0434697}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.24254794523227, "units": "Tflops", "t": 1712255503.9803195}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255503.5498009}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.44430716045276, "units": "Tflops", "t": 1712255504.464331}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255504.0562923}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.35295764222703, "units": "Tflops", "t": 1712255504.9493244}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255504.562552}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.330489517371284, "units": "Tflops", "t": 1712255505.4345589}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255505.0689962}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.91867693633559, "units": "Tflops", "t": 1712255505.9245315}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255505.5752397}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.86405391084061, "units": "Tflops", "t": 1712255506.4150202}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255506.0814886}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.80390417439531, "units": "Tflops", "t": 1712255506.9060657}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255506.5879545}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.27003883777447, "units": "Tflops", "t": 1712255507.3921373}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255507.0944297}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.03315971237542, "units": "Tflops", "t": 1712255507.880683}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255507.6007733}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.249362039925444, "units": "Tflops", "t": 1712255508.367085}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255508.107333}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.86756763115208, "units": "Tflops", "t": 1712255508.857536}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255508.6136293}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.413256491762844, "units": "Tflops", "t": 1712255509.3530936}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255509.1199245}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.629730615567254, "units": "Tflops", "t": 1712255509.846111}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255509.626175}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.57582919323457, "units": "Tflops", "t": 1712255510.339934}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255510.132472}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.29943193500668, "units": "Tflops", "t": 1712255510.8256989}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255510.6393943}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.07235346507425, "units": "Tflops", "t": 1712255511.3139145}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255511.1457307}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.30152336671966, "units": "Tflops", "t": 1712255511.7998052}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255511.652009}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.618892418552036, "units": "Tflops", "t": 1712255512.2929325}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255512.1583033}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.14842643719602, "units": "Tflops", "t": 1712255512.780409}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255512.664624}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.42014394596967, "units": "Tflops", "t": 1712255513.275786}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255513.1707602}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.30301418593073, "units": "Tflops", "t": 1712255513.7614896}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255513.6771686}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.010150601289375, "units": "Tflops", "t": 1712255514.2503874}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255514.18358}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.86466495310037, "units": "Tflops", "t": 1712255514.7408116}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255514.6898775}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.06675961840541, "units": "Tflops", "t": 1712255515.2290866}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255515.195948}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.08557279222453, "units": "Tflops", "t": 1712255515.7170584}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255515.7021916}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.032016393349885, "units": "Tflops", "t": 1712255516.2057006}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.19838508142936, "units": "Tflops", "t": 1712255516.7034588}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255516.2083428}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.617662119399924, "units": "Tflops", "t": 1712255517.1966574}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255516.7146041}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.09009117312506, "units": "Tflops", "t": 1712255517.6846452}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255517.221214}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.01836701084231, "units": "Tflops", "t": 1712255518.1736708}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255517.7274458}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.04655405981273, "units": "Tflops", "t": 1712255518.6621296}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255518.233823}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.473583510357905, "units": "Tflops", "t": 1712255519.1568947}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255518.740066}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.045212066603355, "units": "Tflops", "t": 1712255519.6453083}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255519.2467453}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.56512482058357, "units": "Tflops", "t": 1712255520.1390796}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255519.7528946}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.06995278598463, "units": "Tflops", "t": 1712255520.6272743}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255520.2602038}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.89405827208788, "units": "Tflops", "t": 1712255521.117369}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255520.7664056}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.06413935682868, "units": "Tflops", "t": 1712255521.605628}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255521.2727547}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.519973263292826, "units": "Tflops", "t": 1712255522.1000543}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255521.7790055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.12750727845468, "units": "Tflops", "t": 1712255522.587689}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255522.2851374}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.28907231439495, "units": "Tflops", "t": 1712255523.0846434}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255522.7918847}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.818839367919864, "units": "Tflops", "t": 1712255523.5755682}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255523.2981963}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.36585096145274, "units": "Tflops", "t": 1712255524.071713}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255523.8043463}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.16850230977778, "units": "Tflops", "t": 1712255524.5588903}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255524.3109093}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.337763102041365, "units": "Tflops", "t": 1712255525.0551436}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255524.817204}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.86988131271229, "units": "Tflops", "t": 1712255525.5457551}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255525.3235364}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.464750159714605, "units": "Tflops", "t": 1712255526.0405846}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255525.8301597}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.83888471166438, "units": "Tflops", "t": 1712255526.531331}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255526.3365083}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.69336405367286, "units": "Tflops", "t": 1712255527.0348437}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255526.842745}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.190942954087575, "units": "Tflops", "t": 1712255527.5217907}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255527.3494315}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.16894352155789, "units": "Tflops", "t": 1712255528.0199347}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255527.8556828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.07821307677197, "units": "Tflops", "t": 1712255528.5080955}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255528.362025}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.64601939820956, "units": "Tflops", "t": 1712255529.0011225}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255528.8683746}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.05436560733881, "units": "Tflops", "t": 1712255529.4894922}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255529.3747003}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.669111932001734, "units": "Tflops", "t": 1712255529.9822176}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255529.8810327}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.59244515057877, "units": "Tflops", "t": 1712255530.475687}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255530.387586}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.90467902682812, "units": "Tflops", "t": 1712255530.97685}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255530.8938532}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.91653320906153, "units": "Tflops", "t": 1712255531.466774}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255531.4004002}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.75349336349786, "units": "Tflops", "t": 1712255531.9696054}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255531.9067729}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.08711160933586, "units": "Tflops", "t": 1712255532.4688323}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255532.4130595}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.610972206472205, "units": "Tflops", "t": 1712255532.9620824}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255532.919665}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.09779499797335, "units": "Tflops", "t": 1712255533.472614}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255533.4260178}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.15018997197237, "units": "Tflops", "t": 1712255533.9710188}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255533.932297}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.62970902029946, "units": "Tflops", "t": 1712255534.4642384}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255534.438724}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.868657300603886, "units": "Tflops", "t": 1712255534.9657967}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255534.9454157}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.17471867761264, "units": "Tflops", "t": 1712255535.4639325}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255535.4516432}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.68978139498773, "units": "Tflops", "t": 1712255535.9564278}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.41669995186631, "units": "Tflops", "t": 1712255536.4517384}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255535.958055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.45832031573522, "units": "Tflops", "t": 1712255536.9469166}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255536.4645443}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.624418809402805, "units": "Tflops", "t": 1712255537.4399807}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255536.970678}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.51293723763094, "units": "Tflops", "t": 1712255537.9458098}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255537.4771962}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.843353780268046, "units": "Tflops", "t": 1712255538.4365218}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255537.9837294}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.19410918894905, "units": "Tflops", "t": 1712255538.9460611}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255538.490056}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.731806881886584, "units": "Tflops", "t": 1712255539.4379442}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255538.9964588}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.571047127787935, "units": "Tflops", "t": 1712255539.931694}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255539.5027804}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.242059717574854, "units": "Tflops", "t": 1712255540.4180152}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255540.0090275}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.04321742162876, "units": "Tflops", "t": 1712255540.9174335}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255540.515736}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.64651645777013, "units": "Tflops", "t": 1712255541.4100928}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255541.0220466}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.7110063924343, "units": "Tflops", "t": 1712255541.913332}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255541.5282109}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.584642082157416, "units": "Tflops", "t": 1712255542.4069076}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255542.0344613}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32", "--tf32"], "time": 1712255542.918109, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/tf32.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/tf32.D1.data new file mode 100644 index 000000000..28301f97e --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/tf32.D1.data @@ -0,0 +1,182 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp32", "--tf32": true}, "weight": 0.0, "name": "tf32", "tag": ["tf32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255496.831005, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32", "--tf32"], "time": 1712255496.8407166}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 44.7788458799578, "units": "Tflops", "t": 1712255498.6116586}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2412.5625, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712255497.9763858}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.11, "temperature": null, "power": null}}, "t": 1712255498.483068}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.95895885313246, "units": "Tflops", "t": 1712255499.1125872}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.24, "temperature": null, "power": null}}, "t": 1712255498.9896023}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.83969049569168, "units": "Tflops", "t": 1712255499.59244}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.44, "temperature": null, "power": null}}, "t": 1712255499.495858}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.45832031573522, "units": "Tflops", "t": 1712255500.0872195}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.57, "temperature": null, "power": null}}, "t": 1712255500.0023372}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.36973408817683, "units": "Tflops", "t": 1712255500.5730667}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.77, "temperature": null, "power": null}}, "t": 1712255500.5084572}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.85624276570781, "units": "Tflops", "t": 1712255501.0634317}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.9, "temperature": null, "power": null}}, "t": 1712255501.0147457}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.62530402262176, "units": "Tflops", "t": 1712255501.5563128}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255501.5209641}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.39257623782317, "units": "Tflops", "t": 1712255502.0408685}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255502.0272985}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.17425419621564, "units": "Tflops", "t": 1712255502.52782}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.77988941592725, "units": "Tflops", "t": 1712255503.0194016}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255502.5336595}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.28235172434591, "units": "Tflops", "t": 1712255503.5053566}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255503.0398607}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.61397162899378, "units": "Tflops", "t": 1712255503.9983792}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255503.5462012}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.14422779640043, "units": "Tflops", "t": 1712255504.4857323}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255504.0526247}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.84612286969951, "units": "Tflops", "t": 1712255504.976292}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255504.5589423}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.395056094837905, "units": "Tflops", "t": 1712255505.4608185}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255505.0652406}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.821953935089155, "units": "Tflops", "t": 1712255505.95168}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255505.5715632}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.25426857104126, "units": "Tflops", "t": 1712255506.4377108}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255506.0778887}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.614813266045914, "units": "Tflops", "t": 1712255506.9307034}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255506.5843349}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.27059432918133, "units": "Tflops", "t": 1712255507.4166696}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255507.0908298}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.178989540404615, "units": "Tflops", "t": 1712255507.903519}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255507.5970776}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.893533834062595, "units": "Tflops", "t": 1712255508.3936152}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255508.1036634}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.943760616541226, "units": "Tflops", "t": 1712255508.883008}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255508.6099136}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.371006213136134, "units": "Tflops", "t": 1712255509.3677788}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255509.1162822}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.851095980508966, "units": "Tflops", "t": 1712255509.869351}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255509.622495}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.38022567106827, "units": "Tflops", "t": 1712255510.3541443}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255510.1286962}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.92675056188943, "units": "Tflops", "t": 1712255510.8437297}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255510.6357908}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.68997627672255, "units": "Tflops", "t": 1712255511.3360486}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255511.142092}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.72117926129767, "units": "Tflops", "t": 1712255511.827869}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255511.6484013}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.222140464981514, "units": "Tflops", "t": 1712255512.325248}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255512.154714}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.50081315456582, "units": "Tflops", "t": 1712255512.8196175}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255512.6609316}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.155368334437334, "units": "Tflops", "t": 1712255513.317753}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255513.1671534}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.39386759221849, "units": "Tflops", "t": 1712255513.8132057}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255513.6736007}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.129869928928215, "units": "Tflops", "t": 1712255514.3007226}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255514.1799622}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.83853594378456, "units": "Tflops", "t": 1712255514.7912612}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255514.6861465}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.89980604170736, "units": "Tflops", "t": 1712255515.2811234}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255515.1923363}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.86259184156734, "units": "Tflops", "t": 1712255515.7715054}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255515.698581}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.46614353772299, "units": "Tflops", "t": 1712255516.2661517}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255516.2046137}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.42164150130122, "units": "Tflops", "t": 1712255516.761284}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255516.711006}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.01116101396581, "units": "Tflops", "t": 1712255517.2499323}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255517.217488}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.77719371413746, "units": "Tflops", "t": 1712255517.7411423}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255517.7238474}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.41163119298791, "units": "Tflops", "t": 1712255518.236544}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255518.2301853}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.06380909337978, "units": "Tflops", "t": 1712255518.7246342}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.97790713095039, "units": "Tflops", "t": 1712255519.2135952}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255518.736472}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.40323484066572, "units": "Tflops", "t": 1712255519.6980278}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255519.2429507}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.11737497642122, "units": "Tflops", "t": 1712255520.1856508}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255519.7492359}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.266417367973084, "units": "Tflops", "t": 1712255520.6715488}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255520.2565103}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.25171526596159, "units": "Tflops", "t": 1712255521.1576216}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255520.7628348}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.00887666735363, "units": "Tflops", "t": 1712255521.6463003}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255521.2690878}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.609828647973714, "units": "Tflops", "t": 1712255522.1393409}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255521.775312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.286798455385124, "units": "Tflops", "t": 1712255522.6250246}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255522.2815325}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.85506478435751, "units": "Tflops", "t": 1712255523.11552}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255522.788174}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.02370708803006, "units": "Tflops", "t": 1712255523.6040359}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255523.2944717}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.623296150469585, "units": "Tflops", "t": 1712255524.0969372}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255523.8007686}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.75496655209367, "units": "Tflops", "t": 1712255524.588385}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255524.3072574}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.62869406628652, "units": "Tflops", "t": 1712255525.081225}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255524.8135872}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.252114894693726, "units": "Tflops", "t": 1712255525.5672698}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255525.319915}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.85421405852636, "units": "Tflops", "t": 1712255526.0576234}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255525.8265164}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.01922397492914, "units": "Tflops", "t": 1712255526.5461853}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255526.332841}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.86999045452822, "units": "Tflops", "t": 1712255527.0363715}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255526.839161}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.27126093686466, "units": "Tflops", "t": 1712255527.5222168}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255527.3457804}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.41069028490827, "units": "Tflops", "t": 1712255528.017479}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255527.8520536}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.296628829430546, "units": "Tflops", "t": 1712255528.5031672}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255528.3583648}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.89211354260454, "units": "Tflops", "t": 1712255528.9931178}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255528.8647535}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.440142856779296, "units": "Tflops", "t": 1712255529.4771535}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255529.371083}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.694199131712345, "units": "Tflops", "t": 1712255529.969269}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255529.8774142}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.59266074372305, "units": "Tflops", "t": 1712255530.4625041}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255530.3838887}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.54872286506226, "units": "Tflops", "t": 1712255530.9562283}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255530.890147}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.84839072519775, "units": "Tflops", "t": 1712255531.446665}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255531.396813}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.93454250196225, "units": "Tflops", "t": 1712255531.9363031}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255531.9031782}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.05168076998865, "units": "Tflops", "t": 1712255532.424522}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255532.4094706}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.2418599911157, "units": "Tflops", "t": 1712255532.9106786}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.613043706998376, "units": "Tflops", "t": 1712255533.4036427}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255532.9159985}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.137136077151276, "units": "Tflops", "t": 1712255533.8909335}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255533.4224138}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.946980184043596, "units": "Tflops", "t": 1712255534.380283}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255533.9286559}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.7134394599446, "units": "Tflops", "t": 1712255534.8721898}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255534.4351397}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.97538491512793, "units": "Tflops", "t": 1712255535.3613765}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255534.9416437}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.087137592370524, "units": "Tflops", "t": 1712255535.8492131}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255535.4480195}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.939599954076904, "units": "Tflops", "t": 1712255536.338647}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255535.9544706}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.86370475130797, "units": "Tflops", "t": 1712255536.828903}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255536.4607756}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.77675895392144, "units": "Tflops", "t": 1712255537.3201087}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255536.9670353}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.10996075036498, "units": "Tflops", "t": 1712255537.8076937}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255537.473502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.57800515724327, "units": "Tflops", "t": 1712255538.3010864}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255537.9800189}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.842525300290234, "units": "Tflops", "t": 1712255538.7915692}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255538.4864187}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.46104205602345, "units": "Tflops", "t": 1712255539.2862768}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255538.9928007}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.08696409638407, "units": "Tflops", "t": 1712255539.7851734}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255539.4991784}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.93104017205305, "units": "Tflops", "t": 1712255540.2748642}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255540.0054255}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.872413539580684, "units": "Tflops", "t": 1712255540.7650237}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255540.5118806}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.81633496006767, "units": "Tflops", "t": 1712255541.2558048}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255541.0182118}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.61518014341669, "units": "Tflops", "t": 1712255541.7487934}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255541.5245922}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.05916390045652, "units": "Tflops", "t": 1712255542.236922}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3475.8125, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712255542.0308287}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32", "--tf32"], "time": 1712255543.0094414, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/whisper.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/whisper.D0.data new file mode 100644 index 000000000..7395c700d --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/whisper.D0.data @@ -0,0 +1,59 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Whisper", "--batch-size": 64}, "plan": {"method": "per_gpu"}, "tags": ["audio", "huggingface"], "weight": 1.0, "name": "whisper", "tag": ["whisper", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255861.040341, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-whisper.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Whisper", "--batch-size", "64"], "time": 1712255863.3807697}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2691.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2691.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2691.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23123.8125, 24512.0], "load": 0.05, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.61 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.36 GiB is free. Including non-PyTorch memory, this process has 20.23 GiB memory in use. Of the allocated memory 19.85 GiB is allocated by PyTorch, and 92.33 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"} +{"event": "line", "data": " runner.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"} +{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"} +{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 2393, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " encoder_outputs = self.encoder(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 1159, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " layer_outputs = encoder_layer(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 722, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " hidden_states, attn_weights, _ = self.self_attn(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 413, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " attn_weights = torch.bmm(query_states, key_states.transpose(1, 2))\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.61 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.36 GiB is free. Including non-PyTorch memory, this process has 20.23 GiB memory in use. Of the allocated memory 19.85 GiB is allocated by PyTorch, and 92.33 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-whisper.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Whisper", "--batch-size", "64"], "time": 1712255876.4242365, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/whisper.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/whisper.D1.data new file mode 100644 index 000000000..a8165dbcf --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/tuzazolu.2024-04-04_18:28:45.589863/whisper.D1.data @@ -0,0 +1,59 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "decentoriole.eastus2.cloudapp.azure.com", "ip": "decentoriole.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "127.0.0.1", "fe80::20d:3aff:fee5:4875%eth0", "00:0d:3a:e5:48:75", "10.0.1.4", "00:00:00:00:00:00"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-50899b94affdf1b88f99eb0f84c745cf/id_rsa.covalent.decentoriole.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "tuzazolu.2024-04-04_18:28:45.589863", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "019421f815bb7f0a373a2409d1d1f1d1", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Whisper", "--batch-size": 64}, "plan": {"method": "per_gpu"}, "tags": ["audio", "huggingface"], "weight": 1.0, "name": "whisper", "tag": ["whisper", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "decentoriole", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-76b68080-f2ad-11ee-aef8-8f495b42d5c8": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.8125, "total": 24512.0}, "utilization": {"compute": 0.12, "memory": 0.09831154128590078}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-774f1700-f2ad-11ee-8e49-e4b9b9bf87ea": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2409.75, "total": 24512.0}, "utilization": {"compute": 0.14, "memory": 0.09830899151436032}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712255863.363996, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-whisper.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Whisper", "--batch-size", "64"], "time": 1712255863.3879108}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2691.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2691.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2691.8125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23123.8125, 24512.0], "load": 0.05, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.61 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.36 GiB is free. Including non-PyTorch memory, this process has 20.23 GiB memory in use. Of the allocated memory 19.85 GiB is allocated by PyTorch, and 92.33 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"} +{"event": "line", "data": " runner.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"} +{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"} +{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 2393, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " encoder_outputs = self.encoder(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 1159, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " layer_outputs = encoder_layer(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 722, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " hidden_states, attn_weights, _ = self.self_attn(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 413, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " attn_weights = torch.bmm(query_states, key_states.transpose(1, 2))\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.61 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.36 GiB is free. Including non-PyTorch memory, this process has 20.23 GiB memory in use. Of the allocated memory 19.85 GiB is allocated by PyTorch, and 92.33 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-whisper.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Whisper", "--batch-size", "64"], "time": 1712255876.1995459, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-fp16.D0.data new file mode 100644 index 000000000..a329488fe --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-fp16.D0.data @@ -0,0 +1,38 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "fp16", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-fp16", "tag": ["bert-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623418.981402, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623421.377318}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.480685234069824}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23980.375, 24512.0], "load": 0.19, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 531.62 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"} +{"event": "line", "data": " runner.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"} +{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 59, in step\n", "pipe": "stderr"} +{"event": "line", "data": " self.amp_scaler.scale(loss).backward()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 531.62 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623426.0404646, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-fp16.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-fp16.D1.data new file mode 100644 index 000000000..75e304153 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-fp16.D1.data @@ -0,0 +1,38 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "fp16", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-fp16", "tag": ["bert-fp16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623421.360096, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623421.3852859}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.480685234069824}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23980.375, 24512.0], "load": 0.16, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 531.62 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"} +{"event": "line", "data": " runner.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"} +{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 59, in step\n", "pipe": "stderr"} +{"event": "line", "data": " self.amp_scaler.scale(loss).backward()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 531.62 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623425.8826509, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-fp32.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-fp32.D0.data new file mode 100644 index 000000000..dbf34133c --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-fp32.D0.data @@ -0,0 +1,56 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "fp32", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-fp32", "tag": ["bert-fp32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623410.199405, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623412.5927112}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23304.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.18 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"} +{"event": "line", "data": " runner.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"} +{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"} +{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 1375, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " prediction_scores = self.cls(sequence_output)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 707, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " prediction_scores = self.predictions(sequence_output)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 697, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " hidden_states = self.decoder(hidden_states)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.18 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623416.502414, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-fp32.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-fp32.D1.data new file mode 100644 index 000000000..41842a761 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-fp32.D1.data @@ -0,0 +1,56 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "fp32", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-fp32", "tag": ["bert-fp32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623412.575147, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp32.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623412.6011856}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23304.375, 24512.0], "load": 0.02, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.18 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"} +{"event": "line", "data": " runner.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"} +{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"} +{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 1375, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " prediction_scores = self.cls(sequence_output)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 707, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " prediction_scores = self.predictions(sequence_output)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 697, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " hidden_states = self.decoder(hidden_states)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.18 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp32.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623416.5814161, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-tf32-fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-tf32-fp16.D0.data new file mode 100644 index 000000000..abfb3db52 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-tf32-fp16.D0.data @@ -0,0 +1,38 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 3.0, "name": "bert-tf32-fp16", "tag": ["bert-tf32-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623437.305882, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623439.7101252}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.480685234069824}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23980.375, 24512.0], "load": 0.16, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 531.62 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"} +{"event": "line", "data": " runner.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"} +{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 59, in step\n", "pipe": "stderr"} +{"event": "line", "data": " self.amp_scaler.scale(loss).backward()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 531.62 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623443.823012, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-tf32-fp16.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-tf32-fp16.D1.data new file mode 100644 index 000000000..cbe1e673f --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-tf32-fp16.D1.data @@ -0,0 +1,38 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 3.0, "name": "bert-tf32-fp16", "tag": ["bert-tf32-fp16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623439.693056, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623439.718452}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.480685234069824}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23980.375, 24512.0], "load": 0.19, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 531.62 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"} +{"event": "line", "data": " runner.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"} +{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 59, in step\n", "pipe": "stderr"} +{"event": "line", "data": " self.amp_scaler.scale(loss).backward()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 531.62 MiB is free. Including non-PyTorch memory, this process has 21.08 GiB memory in use. Of the allocated memory 19.58 GiB is allocated by PyTorch, and 1.22 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623444.1434972, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-tf32.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-tf32.D0.data new file mode 100644 index 000000000..a6ed56507 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-tf32.D0.data @@ -0,0 +1,56 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-tf32", "tag": ["bert-tf32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623428.442214, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623430.9410863}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23304.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.18 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"} +{"event": "line", "data": " runner.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"} +{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"} +{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 1375, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " prediction_scores = self.cls(sequence_output)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 707, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " prediction_scores = self.predictions(sequence_output)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 697, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " hidden_states = self.decoder(hidden_states)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.18 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623434.8314877, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-tf32.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-tf32.D1.data new file mode 100644 index 000000000..ea0791402 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bert-tf32.D1.data @@ -0,0 +1,56 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-tf32", "tag": ["bert-tf32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623430.922227, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623430.9480348}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23304.375, 24512.0], "load": 0.02, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.18 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"} +{"event": "line", "data": " runner.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"} +{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"} +{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 1375, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " prediction_scores = self.cls(sequence_output)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 707, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " prediction_scores = self.predictions(sequence_output)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/bert/modeling_bert.py\", line 697, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " hidden_states = self.decoder(hidden_states)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.86 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.18 GiB is free. Including non-PyTorch memory, this process has 20.42 GiB memory in use. Of the allocated memory 20.07 GiB is allocated by PyTorch, and 81.52 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712623434.917985, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bf16.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bf16.D0.data new file mode 100644 index 000000000..eb377ccba --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bf16.D0.data @@ -0,0 +1,138 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "bf16"}, "weight": 0.0, "name": "bf16", "tag": ["bf16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623063.236003, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "bf16"], "time": 1712623065.6052935}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 82.18137656811277, "units": "Tflops", "t": 1712623067.2041838}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2393.125, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712623066.809077}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.78082259896284, "units": "Tflops", "t": 1712623067.439379}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.05, "temperature": null, "power": null}}, "t": 1712623067.3150377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.11527290445359, "units": "Tflops", "t": 1712623067.6731403}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 88.5973314946801, "units": "Tflops", "t": 1712623067.921405}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.25, "temperature": null, "power": null}}, "t": 1712623067.8205304}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.04916179914048, "units": "Tflops", "t": 1712623068.1604064}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.47516402192292, "units": "Tflops", "t": 1712623068.3957233}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.38, "temperature": null, "power": null}}, "t": 1712623068.326009}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.47980618499372, "units": "Tflops", "t": 1712623068.6310735}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 89.07812769495402, "units": "Tflops", "t": 1712623068.8780403}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.58, "temperature": null, "power": null}}, "t": 1712623068.8315809}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.67696615309998, "units": "Tflops", "t": 1712623069.1189268}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.21951675826642, "units": "Tflops", "t": 1712623069.3548973}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.78, "temperature": null, "power": null}}, "t": 1712623069.3371701}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.34832463942529, "units": "Tflops", "t": 1712623069.5931332}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.9526522614453, "units": "Tflops", "t": 1712623069.8349648}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.2184589207597, "units": "Tflops", "t": 1712623070.076089}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.91, "temperature": null, "power": null}}, "t": 1712623069.8425956}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.99875554688103, "units": "Tflops", "t": 1712623070.3152518}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.29094075181388, "units": "Tflops", "t": 1712623070.5535767}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.98, "temperature": null, "power": null}}, "t": 1712623070.348327}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.16643491499788, "units": "Tflops", "t": 1712623070.7948887}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.25762877121089, "units": "Tflops", "t": 1712623071.03603}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.98, "temperature": null, "power": null}}, "t": 1712623070.8537886}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.20983802050046, "units": "Tflops", "t": 1712623071.2751284}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.32281188201391, "units": "Tflops", "t": 1712623071.5133786}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.98, "temperature": null, "power": null}}, "t": 1712623071.3592489}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.00542021194711, "units": "Tflops", "t": 1712623071.7552521}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.02895218636127, "units": "Tflops", "t": 1712623071.996881}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.98, "temperature": null, "power": null}}, "t": 1712623071.8649027}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.34249983335145, "units": "Tflops", "t": 1712623072.2351222}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.34157532853149, "units": "Tflops", "t": 1712623072.4733238}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623072.3703656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.2754197367318, "units": "Tflops", "t": 1712623072.7143567}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.01215522116922, "units": "Tflops", "t": 1712623072.956148}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623072.8758824}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.64271648990436, "units": "Tflops", "t": 1712623073.1962337}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.33501187657636, "units": "Tflops", "t": 1712623073.4345567}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623073.381343}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.38421272703722, "units": "Tflops", "t": 1712623073.6753113}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.33733575544383, "units": "Tflops", "t": 1712623073.9161294}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623073.8868787}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.35760097519562, "units": "Tflops", "t": 1712623074.1570761}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.16551788531262, "units": "Tflops", "t": 1712623074.395735}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623074.392352}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.37706042704421, "units": "Tflops", "t": 1712623074.6364944}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.36194469229389, "units": "Tflops", "t": 1712623074.8773472}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.38004796050076, "units": "Tflops", "t": 1712623075.118053}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623074.8978448}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.00618906299496, "units": "Tflops", "t": 1712623075.3571844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.48296546967502, "units": "Tflops", "t": 1712623075.6002727}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623075.4033408}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.36656034431981, "units": "Tflops", "t": 1712623075.8412232}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.37832784162232, "units": "Tflops", "t": 1712623076.0819278}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623075.9087265}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.9904057453513, "units": "Tflops", "t": 1712623076.3210876}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.02571806411885, "units": "Tflops", "t": 1712623076.5627272}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623076.414348}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.3660173022285, "units": "Tflops", "t": 1712623076.803643}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.37561199629059, "units": "Tflops", "t": 1712623077.0443556}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623076.9198291}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.46196431366901, "units": "Tflops", "t": 1712623077.2848835}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.34511508896233, "units": "Tflops", "t": 1712623077.5256891}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623077.425265}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.4496312303785, "units": "Tflops", "t": 1712623077.7664015}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.37597409967411, "units": "Tflops", "t": 1712623078.0071177}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623077.9306684}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.37009027526594, "units": "Tflops", "t": 1712623078.2479057}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.37262475287666, "units": "Tflops", "t": 1712623078.488631}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623078.436275}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.36837053124633, "units": "Tflops", "t": 1712623078.7294173}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.36610780879543, "units": "Tflops", "t": 1712623078.9702625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623078.941826}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.37135749649583, "units": "Tflops", "t": 1712623079.2110434}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.38059116939033, "units": "Tflops", "t": 1712623079.4517431}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623079.4473877}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.38041009904289, "units": "Tflops", "t": 1712623079.6924856}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.9513069406841, "units": "Tflops", "t": 1712623079.9343135}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.35389104390393, "units": "Tflops", "t": 1712623080.1750827}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623079.9528053}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.36122071076, "units": "Tflops", "t": 1712623080.4160469}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.26205328719895, "units": "Tflops", "t": 1712623080.6570585}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623080.45829}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.71122319838605, "units": "Tflops", "t": 1712623080.8995764}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.37199112029228, "units": "Tflops", "t": 1712623081.1402974}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623080.9637456}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.37407308892712, "units": "Tflops", "t": 1712623081.3810673}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.96879921545683, "units": "Tflops", "t": 1712623081.6228526}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623081.469183}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.72219783049391, "units": "Tflops", "t": 1712623081.865471}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.37579304762363, "units": "Tflops", "t": 1712623082.1061785}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623081.974656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.0311982398, "units": "Tflops", "t": 1712623082.3478477}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.11969102606311, "units": "Tflops", "t": 1712623082.589238}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623082.4801822}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.76496175738967, "units": "Tflops", "t": 1712623082.8316133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.37950475806933, "units": "Tflops", "t": 1712623083.072312}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623082.9856522}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.99895554081445, "units": "Tflops", "t": 1712623083.3140626}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.90478334088733, "units": "Tflops", "t": 1712623083.5561328}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623083.4910765}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.45004547184301, "units": "Tflops", "t": 1712623083.7993891}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.37425413416183, "units": "Tflops", "t": 1712623084.0401084}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623083.996587}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.05869891771358, "units": "Tflops", "t": 1712623084.2817082}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.3255037757806, "units": "Tflops", "t": 1712623084.525223}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623084.5020697}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.69588101013983, "units": "Tflops", "t": 1712623084.767784}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.50791543699513, "units": "Tflops", "t": 1712623085.0107996}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623085.0075552}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.36185419397465, "units": "Tflops", "t": 1712623085.2515903}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.48749273377143, "units": "Tflops", "t": 1712623085.4947727}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.32594606177697, "units": "Tflops", "t": 1712623085.7382817}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623085.5131333}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.54292184599564, "units": "Tflops", "t": 1712623085.9812646}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.36493123741121, "units": "Tflops", "t": 1712623086.2220073}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623086.0186527}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.39650186465059, "units": "Tflops", "t": 1712623086.4653745}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.56888315833132, "units": "Tflops", "t": 1712623086.708235}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623086.52411}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.32152339671589, "units": "Tflops", "t": 1712623086.951797}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.12770343199365, "units": "Tflops", "t": 1712623087.1931581}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623087.029592}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.71220456143875, "units": "Tflops", "t": 1712623087.4356759}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.61488544015931, "units": "Tflops", "t": 1712623087.6785293}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623087.5351698}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 89.75880094995466, "units": "Tflops", "t": 1712623087.923652}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.38683853583618, "units": "Tflops", "t": 1712623088.1643443}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623088.0407677}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.27970640315782, "units": "Tflops", "t": 1712623088.4080324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.08730035039915, "units": "Tflops", "t": 1712623088.6521826}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623088.5463588}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "bf16"], "time": 1712623089.4562054, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bf16.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bf16.D1.data new file mode 100644 index 000000000..52268b3b6 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/bf16.D1.data @@ -0,0 +1,138 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "bf16"}, "weight": 0.0, "name": "bf16", "tag": ["bf16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623065.596248, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "bf16"], "time": 1712623065.605839}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 81.89998922773792, "units": "Tflops", "t": 1712623067.175949}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2393.125, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712623066.7665226}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.83491943376553, "units": "Tflops", "t": 1712623067.4109821}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.09, "temperature": null, "power": null}}, "t": 1712623067.2727697}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.45944093642903, "units": "Tflops", "t": 1712623067.6463833}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 89.59209737737582, "units": "Tflops", "t": 1712623067.8918984}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.23, "temperature": null, "power": null}}, "t": 1712623067.778479}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.4082355220014, "units": "Tflops", "t": 1712623068.1274264}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.61843057518533, "units": "Tflops", "t": 1712623068.3623729}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.42, "temperature": null, "power": null}}, "t": 1712623068.2842638}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.65445850405426, "units": "Tflops", "t": 1712623068.5972767}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.68277290357786, "units": "Tflops", "t": 1712623068.839835}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.56, "temperature": null, "power": null}}, "t": 1712623068.7900558}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.28906116362654, "units": "Tflops", "t": 1712623069.0813003}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.32440266246397, "units": "Tflops", "t": 1712623069.317004}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.75, "temperature": null, "power": null}}, "t": 1712623069.2955978}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.25599255897929, "units": "Tflops", "t": 1712623069.5531356}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.37190060206905, "units": "Tflops", "t": 1712623069.7938764}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.70057323575507, "units": "Tflops", "t": 1712623070.033749}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.89, "temperature": null, "power": null}}, "t": 1712623069.8011928}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.62636352352396, "units": "Tflops", "t": 1712623070.271347}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.78607587825098, "units": "Tflops", "t": 1712623070.5084784}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623070.3069987}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.48074237803415, "units": "Tflops", "t": 1712623070.7489982}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.59548304270619, "units": "Tflops", "t": 1712623070.9892557}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623070.8129425}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.3687635007859, "units": "Tflops", "t": 1712623071.227462}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.81502084409433, "units": "Tflops", "t": 1712623071.4648328}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623071.3187666}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.99343352019302, "units": "Tflops", "t": 1712623071.7041874}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.99428718283042, "units": "Tflops", "t": 1712623071.946012}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623071.8245184}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.866804351966, "units": "Tflops", "t": 1712623072.1855183}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.33501187657636, "units": "Tflops", "t": 1712623072.423902}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623072.3304276}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.76434574008722, "units": "Tflops", "t": 1712623072.6636903}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.42941579777474, "units": "Tflops", "t": 1712623072.9043434}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623072.8361802}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.83067553363065, "units": "Tflops", "t": 1712623073.1439433}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.45023882788266, "units": "Tflops", "t": 1712623073.3818696}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623073.3419278}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.8512517562348, "units": "Tflops", "t": 1712623073.6213996}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.77112522775298, "units": "Tflops", "t": 1712623073.8637717}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623073.848226}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.68817572299594, "units": "Tflops", "t": 1712623074.103805}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.07406394167289, "units": "Tflops", "t": 1712623074.3429682}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.25853791524695, "units": "Tflops", "t": 1712623074.581467}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623074.3541095}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.75969220740726, "units": "Tflops", "t": 1712623074.8241622}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.8577466116662, "units": "Tflops", "t": 1712623075.0637395}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623074.8601513}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.723827610859, "units": "Tflops", "t": 1712623075.303635}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.68617055797553, "units": "Tflops", "t": 1712623075.5436442}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623075.3660345}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.71443324750041, "units": "Tflops", "t": 1712623075.7836547}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.32612137432967, "units": "Tflops", "t": 1712623076.0245068}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623075.8719141}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.45534393630587, "units": "Tflops", "t": 1712623076.2652109}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.85353857239662, "units": "Tflops", "t": 1712623076.5046775}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623076.378009}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.12365203940374, "units": "Tflops", "t": 1712623076.7462792}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.2095285722104, "units": "Tflops", "t": 1712623076.9874337}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623076.8839407}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.29104899660878, "units": "Tflops", "t": 1712623077.2286203}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.9753615778755, "units": "Tflops", "t": 1712623077.4677694}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623077.3897924}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.2525727068042, "units": "Tflops", "t": 1712623077.709035}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.12104133283255, "units": "Tflops", "t": 1712623077.950453}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623077.895687}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 89.6168196188575, "units": "Tflops", "t": 1712623078.196175}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.38254865443005, "units": "Tflops", "t": 1712623078.4344354}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623078.4016285}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.28463402855289, "units": "Tflops", "t": 1712623078.6754656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.36185419397465, "units": "Tflops", "t": 1712623078.916415}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623078.9076514}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.2271585886938, "units": "Tflops", "t": 1712623079.1604395}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.02454856964896, "units": "Tflops", "t": 1712623079.3994665}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.44419122154098, "units": "Tflops", "t": 1712623079.6401672}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623079.4136045}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.38167760654434, "units": "Tflops", "t": 1712623079.881059}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.93767660162145, "units": "Tflops", "t": 1712623080.1231346}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623079.919495}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.18968961217794, "units": "Tflops", "t": 1712623080.3644476}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.67441476407215, "units": "Tflops", "t": 1712623080.6044745}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623080.4254313}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.20204285971863, "units": "Tflops", "t": 1712623080.8457232}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.54185526113812, "units": "Tflops", "t": 1712623081.0888922}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623080.9313893}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.38258299057357, "units": "Tflops", "t": 1712623081.3296852}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.871013606828, "units": "Tflops", "t": 1712623081.56925}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623081.4375348}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.04521611735184, "units": "Tflops", "t": 1712623081.8110604}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.0302099626318, "units": "Tflops", "t": 1712623082.052828}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623081.9436045}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.44718314630637, "units": "Tflops", "t": 1712623082.293524}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.39924525933007, "units": "Tflops", "t": 1712623082.534178}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623082.4494996}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.6563768507648, "units": "Tflops", "t": 1712623082.7744324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 89.09525105488827, "units": "Tflops", "t": 1712623083.0213134}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623082.9550524}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.7740242809246, "units": "Tflops", "t": 1712623083.2612116}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.35443394185738, "units": "Tflops", "t": 1712623083.5019965}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623083.4610865}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.82475592980865, "units": "Tflops", "t": 1712623083.744448}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.44543326208972, "units": "Tflops", "t": 1712623083.987792}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623083.9668524}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.66739917643574, "units": "Tflops", "t": 1712623084.228061}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.41056820188973, "units": "Tflops", "t": 1712623084.468703}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.02859282809659, "units": "Tflops", "t": 1712623084.710519}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623084.4727085}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 89.98244946537358, "units": "Tflops", "t": 1712623084.9550548}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.76103186549818, "units": "Tflops", "t": 1712623085.1975033}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623084.978622}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.42488441611836, "units": "Tflops", "t": 1712623085.4383972}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.75674509880973, "units": "Tflops", "t": 1712623085.6809566}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623085.4844997}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.83888922450546, "units": "Tflops", "t": 1712623085.9231915}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.04072273779174, "units": "Tflops", "t": 1712623086.1649625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623085.9915082}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.06840900689552, "units": "Tflops", "t": 1712623086.4067826}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.07830104153484, "units": "Tflops", "t": 1712623086.648295}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623086.4973695}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.4584728390808, "units": "Tflops", "t": 1712623086.891674}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.03569067920935, "units": "Tflops", "t": 1712623087.1334126}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623087.0033453}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.10385041569235, "units": "Tflops", "t": 1712623087.3751059}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.83763672964976, "units": "Tflops", "t": 1712623087.6172595}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623087.508977}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 89.26313257943669, "units": "Tflops", "t": 1712623087.8639495}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.01655596561953, "units": "Tflops", "t": 1712623088.1057642}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623088.014946}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.85472731732538, "units": "Tflops", "t": 1712623088.347928}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.37995742631371, "units": "Tflops", "t": 1712623088.5888007}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623088.5208538}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "bf16"], "time": 1712623089.3806674, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-fp16.D0.data new file mode 100644 index 000000000..b2919edb5 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-fp16.D0.data @@ -0,0 +1,72 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-fp16", "tag": ["convnext_large-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623364.996635, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623367.3813853}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24374.375, 24512.0], "load": 0.07, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 137.62 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/activation.py\", line 682, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.gelu(input, approximate=self.approximate)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 137.62 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623372.918923, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-fp16.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-fp16.D1.data new file mode 100644 index 000000000..c0d50b8c2 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-fp16.D1.data @@ -0,0 +1,72 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-fp16", "tag": ["convnext_large-fp16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623367.362583, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623367.3895981}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [10830.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24374.375, 24512.0], "load": 0.09, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 137.62 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/activation.py\", line 682, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.gelu(input, approximate=self.approximate)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 137.62 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623372.9969852, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-fp32.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-fp32.D0.data new file mode 100644 index 000000000..9f2bfa90c --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-fp32.D0.data @@ -0,0 +1,72 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "fp32", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-fp32", "tag": ["convnext_large-fp32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623354.367836, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623356.7626548}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3826.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24408.375, 24512.0], "load": 0.08, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 103.62 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 103.62 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623362.3638864, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-fp32.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-fp32.D1.data new file mode 100644 index 000000000..332ff3dcd --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-fp32.D1.data @@ -0,0 +1,72 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "fp32", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-fp32", "tag": ["convnext_large-fp32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623356.745656, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp32.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623356.771129}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [7694.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24408.375, 24512.0], "load": 0.07, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 103.62 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 103.62 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp32.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623362.598034, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-tf32-fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-tf32-fp16.D0.data new file mode 100644 index 000000000..609205a4c --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-tf32-fp16.D0.data @@ -0,0 +1,72 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 3.0, "name": "convnext_large-tf32-fp16", "tag": ["convnext_large-tf32-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623386.059841, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623388.4732454}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24374.375, 24512.0], "load": 0.09, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 137.62 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/activation.py\", line 682, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.gelu(input, approximate=self.approximate)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 137.62 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623394.4441423, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-tf32-fp16.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-tf32-fp16.D1.data new file mode 100644 index 000000000..04fcbc148 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-tf32-fp16.D1.data @@ -0,0 +1,72 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 3.0, "name": "convnext_large-tf32-fp16", "tag": ["convnext_large-tf32-fp16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623388.455584, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623388.482006}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3444.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24374.375, 24512.0], "load": 0.08, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 137.62 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/activation.py\", line 682, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.gelu(input, approximate=self.approximate)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 137.62 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 21.13 GiB is allocated by PyTorch, and 53.29 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623394.365653, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-tf32.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-tf32.D0.data new file mode 100644 index 000000000..06c32aaa9 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-tf32.D0.data @@ -0,0 +1,72 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-tf32", "tag": ["convnext_large-tf32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623375.385025, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623377.7582028}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3826.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24408.375, 24512.0], "load": 0.03, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 103.62 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 103.62 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623383.6454434, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-tf32.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-tf32.D1.data new file mode 100644 index 000000000..41947d735 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/convnext_large-tf32.D1.data @@ -0,0 +1,72 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-tf32", "tag": ["convnext_large-tf32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623377.740523, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623377.766428}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3826.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24408.375, 24512.0], "load": 0.06, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 103.62 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 176, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._forward_impl(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 170, in _forward_impl\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/convnext.py\", line 63, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " result = self.layer_scale * self.block(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 294.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 103.62 MiB is free. Including non-PyTorch memory, this process has 21.50 GiB memory in use. Of the allocated memory 21.17 GiB is allocated by PyTorch, and 47.55 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712623383.4317787, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/davit_large-multi.0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/davit_large-multi.0.data new file mode 100644 index 000000000..4325d5dc6 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/davit_large-multi.0.data @@ -0,0 +1,232 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "njobs", "n": 1}, "argv": {"--amp": true, "--model": "davit_large", "--batch-size": 128, "--lr-base": 0.01}, "tags": ["classification", "multigpu", "transformer", "vision"], "weight": 5.0, "name": "davit_large-multi", "tag": ["davit_large-multi", "0"], "job-number": 0, "devices": ["0", "1"]}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623520.930649, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["torchrun", "--nproc_per_node=2", "--", "-m", "voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/davit_large-multi.0", "--checkpoint-hist", "1"], "time": 1712623520.9466906}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training in distributed mode with multiple processes, 1 device per process.Process 1, total 2, device cuda:1.\n", "pipe": "stderr"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training in distributed mode with multiple processes, 1 device per process.Process 0, total 2, device cuda:0.\n", "pipe": "stderr"} +{"event": "line", "data": "Model davit_large created, param count:196811752\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.01) calculated from base learning rate (0.01) and global batch size (256) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch DistributedDataParallel.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7064.3125, 24512.0], "load": 0.06, "temperature": null, "power": null}, "1": {"memory": [7064.3125, 24512.0], "load": 0.06, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24346.3125, 24512.0], "load": 0.39, "temperature": null, "power": null}, "1": {"memory": [24346.3125, 24512.0], "load": 0.41, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 165.69 MiB is free. Including non-PyTorch memory, this process has 21.44 GiB memory in use. Of the allocated memory 20.89 GiB is allocated by PyTorch, and 162.76 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 196, in _run_module_as_main\n", "pipe": "stderr"} +{"event": "line", "data": " return _run_code(code, main_globals, None,\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 86, in _run_code\n", "pipe": "stderr"} +{"event": "line", "data": " exec(code, run_globals)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/__main__.py\", line 4, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1519, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " else self._run_ddp_forward(*inputs, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1355, in _run_ddp_forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self.module(*inputs, **kwargs) # type: ignore[index]\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 575, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 563, in forward_features\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.stages(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 433, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.blocks(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 186, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = x + self.drop_path2(self.mlp(self.norm2(x)))\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/layers/mlp.py\", line 41, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.act(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/activation.py\", line 682, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.gelu(input, approximate=self.approximate)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 165.69 MiB is free. Including non-PyTorch memory, this process has 21.44 GiB memory in use. Of the allocated memory 20.89 GiB is allocated by PyTorch, and 162.76 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 1 has a total capacty of 23.73 GiB of which 165.69 MiB is free. Including non-PyTorch memory, this process has 21.44 GiB memory in use. Of the allocated memory 20.89 GiB is allocated by PyTorch, and 162.76 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 196, in _run_module_as_main\n", "pipe": "stderr"} +{"event": "line", "data": " return _run_code(code, main_globals, None,\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 86, in _run_code\n", "pipe": "stderr"} +{"event": "line", "data": " exec(code, run_globals)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/__main__.py\", line 4, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1519, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " else self._run_ddp_forward(*inputs, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1355, in _run_ddp_forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self.module(*inputs, **kwargs) # type: ignore[index]\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 575, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 563, in forward_features\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.stages(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 433, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.blocks(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 186, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = x + self.drop_path2(self.mlp(self.norm2(x)))\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/layers/mlp.py\", line 41, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.act(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/activation.py\", line 682, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.gelu(input, approximate=self.approximate)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 1 has a total capacty of 23.73 GiB of which 165.69 MiB is free. Including non-PyTorch memory, this process has 21.44 GiB memory in use. Of the allocated memory 20.89 GiB is allocated by PyTorch, and 162.76 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "line", "data": "[2024-04-09 00:45:31,977] torch.distributed.elastic.multiprocessing.api: [ERROR] failed (exitcode: 1) local_rank: 0 (pid: 51458) of binary: /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/python\n", "pipe": "stderr"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/torchrun\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py\", line 346, in wrapper\n", "pipe": "stderr"} +{"event": "line", "data": " return f(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/run.py\", line 806, in main\n", "pipe": "stderr"} +{"event": "line", "data": " run(args)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/run.py\", line 797, in run\n", "pipe": "stderr"} +{"event": "line", "data": " elastic_launch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/launcher/api.py\", line 134, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " return launch_agent(self._config, self._entrypoint, list(args))\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/launcher/api.py\", line 264, in launch_agent\n", "pipe": "stderr"} +{"event": "line", "data": " raise ChildFailedError(\n", "pipe": "stderr"} +{"event": "line", "data": "torch.distributed.elastic.multiprocessing.errors.ChildFailedError: \n", "pipe": "stderr"} +{"event": "line", "data": "============================================================\n", "pipe": "stderr"} +{"event": "line", "data": "voir FAILED\n", "pipe": "stderr"} +{"event": "line", "data": "------------------------------------------------------------\n", "pipe": "stderr"} +{"event": "line", "data": "Failures:\n", "pipe": "stderr"} +{"event": "line", "data": "[1]:\n", "pipe": "stderr"} +{"event": "line", "data": " time : 2024-04-09_00:45:31\n", "pipe": "stderr"} +{"event": "line", "data": " host : delicatemastodon.internal.cloudapp.net\n", "pipe": "stderr"} +{"event": "line", "data": " rank : 1 (local_rank: 1)\n", "pipe": "stderr"} +{"event": "line", "data": " exitcode : 1 (pid: 51459)\n", "pipe": "stderr"} +{"event": "line", "data": " error_file: \n", "pipe": "stderr"} +{"event": "line", "data": " traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html\n", "pipe": "stderr"} +{"event": "line", "data": "------------------------------------------------------------\n", "pipe": "stderr"} +{"event": "line", "data": "Root Cause (first observed failure):\n", "pipe": "stderr"} +{"event": "line", "data": "[0]:\n", "pipe": "stderr"} +{"event": "line", "data": " time : 2024-04-09_00:45:31\n", "pipe": "stderr"} +{"event": "line", "data": " host : delicatemastodon.internal.cloudapp.net\n", "pipe": "stderr"} +{"event": "line", "data": " rank : 0 (local_rank: 0)\n", "pipe": "stderr"} +{"event": "line", "data": " exitcode : 1 (pid: 51458)\n", "pipe": "stderr"} +{"event": "line", "data": " error_file: \n", "pipe": "stderr"} +{"event": "line", "data": " traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html\n", "pipe": "stderr"} +{"event": "line", "data": "============================================================\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["torchrun", "--nproc_per_node=2", "--", "-m", "voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/davit_large-multi.0", "--checkpoint-hist", "1"], "time": 1712623532.3300037, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/davit_large.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/davit_large.D0.data new file mode 100644 index 000000000..bea27f08b --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/davit_large.D0.data @@ -0,0 +1,94 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "davit_large", "--batch-size": 128, "--lr-base": 0.01}, "tags": ["classification", "transformer", "vision"], "weight": 1.0, "name": "davit_large", "tag": ["davit_large", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623508.254633, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/davit_large.D0", "--checkpoint-hist", "1"], "time": 1712623510.6078093}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model davit_large created, param count:196811752\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.005) calculated from base learning rate (0.01) and global batch size (128) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6024.375, 24512.0], "load": 0.1, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24418.375, 24512.0], "load": 0.36, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 93.62 MiB is free. Including non-PyTorch memory, this process has 21.51 GiB memory in use. Of the allocated memory 21.02 GiB is allocated by PyTorch, and 188.39 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 575, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 563, in forward_features\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.stages(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 433, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.blocks(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 353, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = x + self.drop_path2(self.mlp(self.norm2(x)))\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/layers/mlp.py\", line 40, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.fc1(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 93.62 MiB is free. Including non-PyTorch memory, this process has 21.51 GiB memory in use. Of the allocated memory 21.02 GiB is allocated by PyTorch, and 188.39 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/davit_large.D0", "--checkpoint-hist", "1"], "time": 1712623518.324035, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/davit_large.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/davit_large.D1.data new file mode 100644 index 000000000..1806a17e7 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/davit_large.D1.data @@ -0,0 +1,94 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "davit_large", "--batch-size": 128, "--lr-base": 0.01}, "tags": ["classification", "transformer", "vision"], "weight": 1.0, "name": "davit_large", "tag": ["davit_large", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623510.591168, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/davit_large.D1", "--checkpoint-hist", "1"], "time": 1712623510.6157527}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model davit_large created, param count:196811752\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.005) calculated from base learning rate (0.01) and global batch size (128) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [6024.375, 24512.0], "load": 0.06, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24418.375, 24512.0], "load": 0.33, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 93.62 MiB is free. Including non-PyTorch memory, this process has 21.51 GiB memory in use. Of the allocated memory 21.02 GiB is allocated by PyTorch, and 188.39 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 575, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 563, in forward_features\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.stages(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 433, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.blocks(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/davit.py\", line 353, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = x + self.drop_path2(self.mlp(self.norm2(x)))\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/layers/mlp.py\", line 40, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.fc1(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py\", line 114, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.linear(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 148.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 93.62 MiB is free. Including non-PyTorch memory, this process has 21.51 GiB memory in use. Of the allocated memory 21.02 GiB is allocated by PyTorch, and 188.39 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/davit_large.D1", "--checkpoint-hist", "1"], "time": 1712623518.567284, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/dlrm.0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/dlrm.0.data new file mode 100644 index 000000000..e687b835d --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/dlrm.0.data @@ -0,0 +1,271 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/dlrm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "dlrm", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "tags": ["nlp", "rl"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/dlrm", "plan": {"method": "njobs", "n": 1}, "argv": {"--num-batches": 1000, "--data-generation": "random", "--arch-mlp-bot": "512-512-64", "--arch-mlp-top": "1024-1024-1024-1", "--arch-sparse-feature-size": 64, "--arch-embedding-size": "1000000-1000000-1000000-1000000-1000000-1000000-1000000-1000000", "--num-indices-per-lookup": 100, "--arch-interaction-op": "dot", "--numpy-rand-seed": "727", "--print-freq": 999999, "--mini-batch-size": 16384, "--test-mini-batch-size": 16384, "--test-num-workers": 0, "--use-gpu": true}, "weight": 1.0, "name": "dlrm", "tag": ["dlrm", "0"], "job-number": 0, "devices": ["0", "1"]}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623684.095908, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/dlrm/voirconf-dlrm.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/dlrm/dlrm/dlrm_s_pytorch.py", "--num-batches", "1000", "--data-generation", "random", "--arch-mlp-bot", "512-512-64", "--arch-mlp-top", "1024-1024-1024-1", "--arch-sparse-feature-size", "64", "--arch-embedding-size", "1000000-1000000-1000000-1000000-1000000-1000000-1000000-1000000", "--num-indices-per-lookup", "100", "--arch-interaction-op", "dot", "--numpy-rand-seed", "727", "--print-freq", "999999", "--mini-batch-size", "16384", "--test-mini-batch-size", "16384", "--test-num-workers", "0", "--use-gpu"], "time": 1712623684.113308}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "line", "data": "Unable to import mlperf_logging, No module named 'mlperf_logging'\n", "pipe": "stdout"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:347: UserWarning: torch.distributed.reduce_op is deprecated, please use torch.distributed.ReduceOp instead\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "world size: 1, current rank: 0, local rank: 0\n", "pipe": "stdout"} +{"event": "line", "data": "Using 2 GPU(s)...\n", "pipe": "stdout"} +{"event": "line", "data": "time/loss/accuracy (if enabled):\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2642.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [2393.125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2642.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [2393.125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2642.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [2393.125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0887361615896225}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4954.375, 24512.0], "load": 0.03, "temperature": null, "power": null}, "1": {"memory": [4936.375, 24512.0], "load": 0.03, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08788755536079407}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4962.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [4944.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08937834203243256}, "pipe": "data"} +{"event": "data", "data": {"rate": 374858.6443835655, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4962.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5348.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08813147246837616}, "pipe": "data"} +{"event": "data", "data": {"rate": 370580.520809485, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5164.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5348.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08771650493144989}, "pipe": "data"} +{"event": "data", "data": {"rate": 376167.0269763922, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5164.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5348.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08742949366569519}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5366.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5348.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 364000.0809580258, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08757737278938293}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5366.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5348.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 372341.30434767785, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08654382824897766}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5366.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 368779.28609246074, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0858154445886612}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5366.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 366422.3478270399, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08628799766302109}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5366.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 376153.86529779475, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5366.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08730762451887131}, "pipe": "data"} +{"event": "data", "data": {"rate": 367143.35022923263, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5366.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08590050786733627}, "pipe": "data"} +{"event": "data", "data": {"rate": 364914.34947502473, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5366.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08602667599916458}, "pipe": "data"} +{"event": "data", "data": {"rate": 372269.73132390995, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5366.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08581672608852386}, "pipe": "data"} +{"event": "data", "data": {"rate": 359851.8650820826, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5366.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08588778972625732}, "pipe": "data"} +{"event": "data", "data": {"rate": 375225.8592177494, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5366.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08632193505764008}, "pipe": "data"} +{"event": "data", "data": {"rate": 368972.10348847765, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5366.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08664406836032867}, "pipe": "data"} +{"event": "data", "data": {"rate": 371890.44802879787, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5366.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0852133184671402}, "pipe": "data"} +{"event": "data", "data": {"rate": 379018.6350674685, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5366.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08576950430870056}, "pipe": "data"} +{"event": "data", "data": {"rate": 367169.6462036745, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5568.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08471724390983582}, "pipe": "data"} +{"event": "data", "data": {"rate": 370772.46632440516, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5568.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08532554656267166}, "pipe": "data"} +{"event": "data", "data": {"rate": 368970.87370857096, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5568.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08427020162343979}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5568.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 373874.33509459393, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08591149747371674}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5568.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 379221.4424543034, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0845126137137413}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5568.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 365862.1577252217, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0840827077627182}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5568.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 372515.69896520715, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5568.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08428709208965302}, "pipe": "data"} +{"event": "data", "data": {"rate": 378647.5418843599, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5568.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08434845507144928}, "pipe": "data"} +{"event": "data", "data": {"rate": 373454.0032267848, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5568.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08465415984392166}, "pipe": "data"} +{"event": "data", "data": {"rate": 376229.0650822549, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5772.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08376755565404892}, "pipe": "data"} +{"event": "data", "data": {"rate": 375583.30719257623, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5772.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.083040751516819}, "pipe": "data"} +{"event": "data", "data": {"rate": 377734.8891289079, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5772.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08479100465774536}, "pipe": "data"} +{"event": "data", "data": {"rate": 364603.27929761703, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5772.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08379324525594711}, "pipe": "data"} +{"event": "data", "data": {"rate": 373809.7192626037, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5772.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08349813520908356}, "pipe": "data"} +{"event": "data", "data": {"rate": 370005.54059810366, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5772.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08437865972518921}, "pipe": "data"} +{"event": "data", "data": {"rate": 377784.6311705755, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5772.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08468881249427795}, "pipe": "data"} +{"event": "data", "data": {"rate": 379094.502331452, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5772.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08403709530830383}, "pipe": "data"} +{"event": "data", "data": {"rate": 366551.07764204656, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08420669287443161}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 376381.6127290497, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08320620656013489}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 380564.84333429433, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08308559656143188}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 373736.84746947035, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08385886251926422}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 371668.88585173787, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08402976393699646}, "pipe": "data"} +{"event": "data", "data": {"rate": 368483.49676804396, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5550.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08415202051401138}, "pipe": "data"} +{"event": "data", "data": {"rate": 373710.020138691, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08402085304260254}, "pipe": "data"} +{"event": "data", "data": {"rate": 379947.3421368536, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08228246867656708}, "pipe": "data"} +{"event": "data", "data": {"rate": 366047.7077859737, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08508101850748062}, "pipe": "data"} +{"event": "data", "data": {"rate": 370532.02079420997, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08269783854484558}, "pipe": "data"} +{"event": "data", "data": {"rate": 379356.10922520014, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08433524519205093}, "pipe": "data"} +{"event": "data", "data": {"rate": 370239.9317008563, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08258543908596039}, "pipe": "data"} +{"event": "data", "data": {"rate": 373759.9099472217, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08313082903623581}, "pipe": "data"} +{"event": "data", "data": {"rate": 368856.8306436097, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08317263424396515}, "pipe": "data"} +{"event": "data", "data": {"rate": 376142.9238422384, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08357995748519897}, "pipe": "data"} +{"event": "data", "data": {"rate": 373656.0361630807, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08405735343694687}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 371348.51300464355, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08272015303373337}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 375528.5569022488, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08338482677936554}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 378121.14088361216, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08422383666038513}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 366997.2852863163, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08405454456806183}, "pipe": "data"} +{"event": "data", "data": {"rate": 379590.9092425567, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08417406678199768}, "pipe": "data"} +{"event": "data", "data": {"rate": 368304.4939420048, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08274278044700623}, "pipe": "data"} +{"event": "data", "data": {"rate": 376821.2345941489, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08461865782737732}, "pipe": "data"} +{"event": "data", "data": {"rate": 377166.8823259815, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08379694819450378}, "pipe": "data"} +{"event": "data", "data": {"rate": 375817.76031607593, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08401922881603241}, "pipe": "data"} +{"event": "data", "data": {"rate": 377850.5414766152, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08402208983898163}, "pipe": "data"} +{"event": "data", "data": {"rate": 378040.1672292429, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0.01, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08343112468719482}, "pipe": "data"} +{"event": "data", "data": {"rate": 372000.0485888735, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0.01, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5976.375, 24512.0], "load": 0, "temperature": null, "power": null}, "1": {"memory": [5754.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/dlrm/voirconf-dlrm.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/dlrm/dlrm/dlrm_s_pytorch.py", "--num-batches", "1000", "--data-generation", "random", "--arch-mlp-bot", "512-512-64", "--arch-mlp-top", "1024-1024-1024-1", "--arch-sparse-feature-size", "64", "--arch-embedding-size", "1000000-1000000-1000000-1000000-1000000-1000000-1000000-1000000", "--num-indices-per-lookup", "100", "--arch-interaction-op", "dot", "--numpy-rand-seed", "727", "--print-freq", "999999", "--mini-batch-size", "16384", "--test-mini-batch-size", "16384", "--test-num-workers", "0", "--use-gpu"], "time": 1712623900.470933, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/focalnet.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/focalnet.D0.data new file mode 100644 index 000000000..7af368556 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/focalnet.D0.data @@ -0,0 +1,253 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "focalnet_base_lrf"}, "tags": ["classification", "convnet", "vision"], "weight": 2.0, "name": "focalnet", "tag": ["focalnet", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623534.662914, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-focalnet.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "focalnet_base_lrf", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/focalnet.D0", "--checkpoint-hist", "1"], "time": 1712623537.0178125}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model focalnet_base_lrf created, param count:88749768\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.9\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.05) calculated from base learning rate (0.1) and global batch size (128) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 7.004446029663086}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5626.375, 24512.0], "load": 0.35, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 0/32 ( 0%)] Loss: 7.004 (7.00) Time: 17.573s, 7.28/s (17.573s, 7.28/s) LR: 1.000e-05 Data: 0.585 (0.585)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23512.375, 24512.0], "load": 0.94, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [13582.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [11108.375, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [10918.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [10618.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.006705284118652}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01315975189209}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.036317348480225}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [8110.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.982916831970215}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.996298789978027}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [22296.375, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 144.68807615526669, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.995458602905273}, "pipe": "data"} +{"event": "data", "data": {"rate": 131.29344970804513, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.995474815368652}, "pipe": "data"} +{"event": "data", "data": {"rate": 167.7826801546604, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24204.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 107.51861469146162, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97037935256958}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.060278415679932}, "pipe": "data"} +{"event": "data", "data": {"rate": 171.1188057875757, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [16206.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.003751754760742}, "pipe": "data"} +{"event": "data", "data": {"rate": 136.69792901313284, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.941399097442627}, "pipe": "data"} +{"event": "data", "data": {"rate": 169.9522817675844, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 108.3922863171404, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24216.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.961507320404053}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.990056037902832}, "pipe": "data"} +{"event": "data", "data": {"rate": 172.62301526721913, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.029509544372559}, "pipe": "data"} +{"event": "data", "data": {"rate": 137.7681942586302, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24366.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94654655456543}, "pipe": "data"} +{"event": "data", "data": {"rate": 170.66232478957426, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 107.81551083500315, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.032044410705566}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24422.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9754743576049805}, "pipe": "data"} +{"event": "data", "data": {"rate": 171.29246278175938, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.979887008666992}, "pipe": "data"} +{"event": "data", "data": {"rate": 137.487251266376, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.980226993560791}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [15344.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 169.98081199491273, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 106.45916019767995, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.979134559631348}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 31/32 (100%)] Loss: 7.005 (7.00) Time: 0.729s, 175.55/s (1.385s, 92.41/s) LR: 1.000e-05 Data: 0.000 (0.038)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 168.21244220072643, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24446.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.867 (0.867) Loss: 6.9616 (6.9616) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 1.223 (0.302) Loss: 6.8639 (6.9459) Acc@1: 0.0000 ( 0.1453) Acc@5: 3.1250 ( 0.6541)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/focalnet.D0/20240409-004541-focalnet_base_lrf-224/checkpoint-0.pth.tar', 0.14534883720930233)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 175.47127320579483, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [8358.375, 24512.0], "load": 0.81, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [8358.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7198.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.020693778991699}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 0/32 ( 0%)] Loss: 7.021 (7.02) Time: 1.296s, 98.77/s (1.296s, 98.77/s) LR: 1.001e-02 Data: 0.476 (0.476)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [11702.375, 24512.0], "load": 0.51, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.960958480834961}, "pipe": "data"} +{"event": "data", "data": {"rate": 132.79107998992617, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 146.87649199048235, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.003782272338867}, "pipe": "data"} +{"event": "data", "data": {"rate": 154.12925408812737, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [15858.375, 24512.0], "load": 0.95, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.034578800201416}, "pipe": "data"} +{"event": "data", "data": {"rate": 125.32590321875432, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.061990261077881}, "pipe": "data"} +{"event": "data", "data": {"rate": 170.3416794985076, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 104.15959893606829, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24220.375, 24512.0], "load": 0.95, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.065667152404785}, "pipe": "data"} +{"event": "data", "data": {"rate": 132.51997500760237, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.019161224365234}, "pipe": "data"} +{"event": "data", "data": {"rate": 167.85865633898098, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24222.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.061379432678223}, "pipe": "data"} +{"event": "data", "data": {"rate": 133.90740532970187, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077938556671143}, "pipe": "data"} +{"event": "data", "data": {"rate": 168.92309339876343, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 99.7747514053412, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.037508964538574}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24316.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 144.70835321349125, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.999046325683594}, "pipe": "data"} +{"event": "data", "data": {"rate": 141.75601582491066, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 116.60003774616504, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.063668727874756}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24392.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077404499053955}, "pipe": "data"} +{"event": "data", "data": {"rate": 169.8169410721702, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 110.4658666551973, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.036367416381836}, "pipe": "data"} +{"event": "data", "data": {"rate": 140.39447506543613, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24402.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.993802070617676}, "pipe": "data"} +{"event": "data", "data": {"rate": 143.0095466626196, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 112.85871090927047, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.007924556732178}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.098423957824707}, "pipe": "data"} +{"event": "data", "data": {"rate": 170.36484490014465, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [15110.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 112.09772856530246, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043365478515625}, "pipe": "data"} +{"event": "data", "data": {"rate": 140.8971201876894, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1979475021362305}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24198.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 141.75450732044862, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.147006034851074}, "pipe": "data"} +{"event": "data", "data": {"rate": 125.32773343040974, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.981863021850586}, "pipe": "data"} +{"event": "data", "data": {"rate": 169.9606144691258, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [11964.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 31/32 (100%)] Loss: 7.108 (7.05) Time: 0.755s, 169.50/s (0.909s, 140.88/s) LR: 1.001e-02 Data: 0.000 (0.035)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 106.75002776334706, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.659 (0.659) Loss: 6.8922 (6.8922) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.068 (0.261) Loss: 6.9392 (6.9700) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 0.9932)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/focalnet.D0/20240409-004541-focalnet_base_lrf-224/checkpoint-1.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 169.48692602613613, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24406.375, 24512.0], "load": 0.84, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24406.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24406.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.995840072631836}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 0/32 ( 0%)] Loss: 6.996 (7.00) Time: 1.491s, 85.87/s (1.491s, 85.87/s) LR: 2.001e-02 Data: 0.495 (0.495)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24272.375, 24512.0], "load": 0.53, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.034093856811523}, "pipe": "data"} +{"event": "data", "data": {"rate": 141.82756283914355, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.15028715133667}, "pipe": "data"} +{"event": "data", "data": {"rate": 166.4497209118674, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 106.89810855455488, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24152.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.148109436035156}, "pipe": "data"} +{"event": "data", "data": {"rate": 165.41324457517086, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.019067764282227}, "pipe": "data"} +{"event": "data", "data": {"rate": 151.4865170129708, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.009698867797852}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24364.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 133.6772144864222, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.025642395019531}, "pipe": "data"} +{"event": "data", "data": {"rate": 167.09900989828867, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 104.23724990928713, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.160573482513428}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24392.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 169.11552486688353, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0108962059021}, "pipe": "data"} +{"event": "data", "data": {"rate": 151.56588464271024, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.101762771606445}, "pipe": "data"} +{"event": "data", "data": {"rate": 131.59627105434186, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.141188621520996}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7624.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 167.16766598496127, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 102.5111027633081, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.052355766296387}, "pipe": "data"} +{"event": "data", "data": {"rate": 165.63165730081326, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.292587757110596}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [19852.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 148.08695207218898, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.112408638000488}, "pipe": "data"} +{"event": "data", "data": {"rate": 132.39288173908304, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "stop", "data": null, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-focalnet.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "focalnet_base_lrf", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/focalnet.D0", "--checkpoint-hist", "1"], "time": 1712623653.885088, "return_code": -15}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/focalnet.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/focalnet.D1.data new file mode 100644 index 000000000..ef395525a --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/focalnet.D1.data @@ -0,0 +1,254 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "focalnet_base_lrf"}, "tags": ["classification", "convnet", "vision"], "weight": 2.0, "name": "focalnet", "tag": ["focalnet", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623537.001089, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-focalnet.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "focalnet_base_lrf", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/focalnet.D1", "--checkpoint-hist", "1"], "time": 1712623537.025442}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model focalnet_base_lrf created, param count:88749768\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.9\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.05) calculated from base learning rate (0.1) and global batch size (128) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 7.004453659057617}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5626.375, 24512.0], "load": 0.37, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 0/32 ( 0%)] Loss: 7.004 (7.00) Time: 17.579s, 7.28/s (17.579s, 7.28/s) LR: 1.000e-05 Data: 0.597 (0.597)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23512.375, 24512.0], "load": 0.93, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [13582.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [11038.375, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [10918.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [10618.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.006725788116455}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.013141632080078}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [16210.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.036336421966553}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.982904434204102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9962921142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [19750.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99544620513916}, "pipe": "data"} +{"event": "data", "data": {"rate": 138.93248807748276, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 129.1306614863104, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9885573387146}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24470.375, 24512.0], "load": 0.95, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 168.94909915069914, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.060260772705078}, "pipe": "data"} +{"event": "data", "data": {"rate": 138.86194686864405, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0094709396362305}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24148.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 152.98267604456046, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.941376686096191}, "pipe": "data"} +{"event": "data", "data": {"rate": 137.91001271685465, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.056238651275635}, "pipe": "data"} +{"event": "data", "data": {"rate": 168.32388235499852, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [17760.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 108.85460374429937, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9900665283203125}, "pipe": "data"} +{"event": "data", "data": {"rate": 169.8606538571421, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.997450828552246}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24162.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.946535587310791}, "pipe": "data"} +{"event": "data", "data": {"rate": 137.4970846455546, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 128.51496073280597, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.032040119171143}, "pipe": "data"} +{"event": "data", "data": {"rate": 168.31691174697158, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24430.375, 24512.0], "load": 0.95, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.990649223327637}, "pipe": "data"} +{"event": "data", "data": {"rate": 132.2175152901551, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.980195045471191}, "pipe": "data"} +{"event": "data", "data": {"rate": 174.88995392938844, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24434.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 137.6527772551722, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9791364669799805}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 31/32 (100%)] Loss: 7.005 (7.00) Time: 0.729s, 175.49/s (1.334s, 95.94/s) LR: 1.000e-05 Data: 0.001 (0.035)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 175.75539788439644, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.864 (0.864) Loss: 6.9615 (6.9615) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 1.242 (0.301) Loss: 6.8639 (6.9459) Acc@1: 0.0000 ( 0.1453) Acc@5: 3.1250 ( 0.6541)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/focalnet.D1/20240409-004541-focalnet_base_lrf-224/checkpoint-0.pth.tar', 0.14534883720930233)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 175.452445595315, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [7784.375, 24512.0], "load": 0.81, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [7784.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [7784.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [6628.375, 24512.0], "load": 0.79, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.020674705505371}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 0/32 ( 0%)] Loss: 7.021 (7.02) Time: 1.261s, 101.51/s (1.261s, 101.51/s) LR: 1.001e-02 Data: 0.447 (0.447)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 157.08403145219927, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0176897048950195}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24408.375, 24512.0], "load": 0.68, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.003789901733398}, "pipe": "data"} +{"event": "data", "data": {"rate": 171.26652733612843, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 115.46119741767964, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.034580230712891}, "pipe": "data"} +{"event": "data", "data": {"rate": 141.8739121519906, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.062034606933594}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [14934.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 156.76215623989577, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.065652847290039}, "pipe": "data"} +{"event": "data", "data": {"rate": 129.7413158045572, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.019181251525879}, "pipe": "data"} +{"event": "data", "data": {"rate": 172.05059022868997, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24202.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 105.44917240280712, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.061359405517578}, "pipe": "data"} +{"event": "data", "data": {"rate": 149.53084341574348, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077919006347656}, "pipe": "data"} +{"event": "data", "data": {"rate": 148.3702948852656, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [11638.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.037487983703613}, "pipe": "data"} +{"event": "data", "data": {"rate": 130.77690977481686, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.999039173126221}, "pipe": "data"} +{"event": "data", "data": {"rate": 172.7643045105407, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24208.375, 24512.0], "load": 0.95, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 107.16275404302822, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.063638687133789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077393054962158}, "pipe": "data"} +{"event": "data", "data": {"rate": 173.11253852392164, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24190.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.036355495452881}, "pipe": "data"} +{"event": "data", "data": {"rate": 138.1555736898298, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.993810653686523}, "pipe": "data"} +{"event": "data", "data": {"rate": 161.2342676671038, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 106.62639663117432, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.007869243621826}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24460.375, 24512.0], "load": 0.95, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 171.4367936596502, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.098379135131836}, "pipe": "data"} +{"event": "data", "data": {"rate": 150.72752419516488, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043417930603027}, "pipe": "data"} +{"event": "data", "data": {"rate": 139.25212880726397, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24412.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.197956085205078}, "pipe": "data"} +{"event": "data", "data": {"rate": 164.70451497242468, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 106.06527356039646, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1470232009887695}, "pipe": "data"} +{"event": "data", "data": {"rate": 163.75250302897035, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.981897354125977}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [13198.375, 24512.0], "load": 0.95, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 144.6034434368278, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 31/32 (100%)] Loss: 7.108 (7.05) Time: 0.752s, 170.13/s (0.888s, 144.16/s) LR: 1.001e-02 Data: 0.000 (0.034)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.687 (0.687) Loss: 6.8922 (6.8922) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.067 (0.261) Loss: 6.9395 (6.9699) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 0.9932)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/focalnet.D1/20240409-004541-focalnet_base_lrf-224/checkpoint-1.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 140.305475448802, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24408.375, 24512.0], "load": 0.82, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24408.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24408.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.995878219604492}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 0/32 ( 0%)] Loss: 6.996 (7.00) Time: 1.488s, 86.01/s (1.488s, 86.01/s) LR: 2.001e-02 Data: 0.494 (0.494)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24154.375, 24512.0], "load": 0.51, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 111.41416305057494, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.034097671508789}, "pipe": "data"} +{"event": "data", "data": {"rate": 162.86804178109608, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.150274276733398}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24180.375, 24512.0], "load": 0.94, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.148129940032959}, "pipe": "data"} +{"event": "data", "data": {"rate": 137.63683937233148, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.019026756286621}, "pipe": "data"} +{"event": "data", "data": {"rate": 167.7525285656443, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [18326.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 106.25671577205398, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.009702205657959}, "pipe": "data"} +{"event": "data", "data": {"rate": 169.2617637858093, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.025614261627197}, "pipe": "data"} +{"event": "data", "data": {"rate": 147.8443292493536, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24196.375, 24512.0], "load": 0.96, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1605610847473145}, "pipe": "data"} +{"event": "data", "data": {"rate": 137.3981708410155, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.010904312133789}, "pipe": "data"} +{"event": "data", "data": {"rate": 159.7444332677976, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 106.39428851055085, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.101785659790039}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24294.375, 24512.0], "load": 0.95, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 168.3947192426414, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.141188144683838}, "pipe": "data"} +{"event": "data", "data": {"rate": 149.41564957731674, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.052373886108398}, "pipe": "data"} +{"event": "data", "data": {"rate": 138.55172088573545, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24388.375, 24512.0], "load": 0.95, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.292572975158691}, "pipe": "data"} +{"event": "data", "data": {"rate": 165.284024480878, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 107.28100103801494, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.112317085266113}, "pipe": "data"} +{"event": "data", "data": {"rate": 165.56687318961033, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24458.375, 24512.0], "load": 0.95, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.083057880401611}, "pipe": "data"} +{"event": "data", "data": {"rate": 145.01590332764957, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.190218448638916}, "pipe": "data"} +{"event": "data", "data": {"rate": 137.08163325423587, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24458.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 130.51220037123747, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "stop", "data": null, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-focalnet.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "focalnet_base_lrf", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/focalnet.D1", "--checkpoint-hist", "1"], "time": 1712623655.5555615, "return_code": -15}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/fp16.D0.data new file mode 100644 index 000000000..a65899a70 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/fp16.D0.data @@ -0,0 +1,222 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 30, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp16"}, "weight": 0.0, "name": "fp16", "tag": ["fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712622992.209855, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "30", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp16"], "time": 1712622994.5857365}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 92.08166284929595, "units": "Tflops", "t": 1712622996.6545672}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2393.125, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712622995.7894864}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.06, "temperature": null, "power": null}}, "t": 1712622996.2953954}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.46340304625043, "units": "Tflops", "t": 1712622997.3536675}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.26, "temperature": null, "power": null}}, "t": 1712622996.8009908}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.39, "temperature": null, "power": null}}, "t": 1712622997.3064864}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.47707859990244, "units": "Tflops", "t": 1712622998.0521567}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.59, "temperature": null, "power": null}}, "t": 1712622997.8122258}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 95.27063050012336, "units": "Tflops", "t": 1712622998.7460208}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.72, "temperature": null, "power": null}}, "t": 1712622998.3176758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.33750972713605, "units": "Tflops", "t": 1712622999.4454556}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.92, "temperature": null, "power": null}}, "t": 1712622998.823255}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712622999.3286662}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.02872880136583, "units": "Tflops", "t": 1712623000.1472428}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712622999.8340993}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.65276098093757, "units": "Tflops", "t": 1712623000.8451257}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623000.3395743}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.98, "temperature": null, "power": null}}, "t": 1712623000.8450837}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.49708300157378, "units": "Tflops", "t": 1712623001.543527}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623001.3505857}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.8620390214926, "units": "Tflops", "t": 1712623002.2465084}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623001.8559945}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.21987534073035, "units": "Tflops", "t": 1712623002.9469368}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623002.3616176}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623002.8672562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.4229478896861, "units": "Tflops", "t": 1712623003.645765}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623003.372737}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.79112194850264, "units": "Tflops", "t": 1712623004.349402}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623003.8782525}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.95172154878891, "units": "Tflops", "t": 1712623005.051886}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623004.383736}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623004.889268}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.08531945543064, "units": "Tflops", "t": 1712623005.7532241}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623005.3947623}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.88856898936464, "units": "Tflops", "t": 1712623006.4561307}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623005.9005044}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623006.4059918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.7232324609725, "units": "Tflops", "t": 1712623007.1601987}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623006.9113808}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.32415085519008, "units": "Tflops", "t": 1712623007.867364}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623007.416881}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.75257457843477, "units": "Tflops", "t": 1712623008.5711486}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623007.9224908}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623008.4280367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.5307107343003, "units": "Tflops", "t": 1712623009.2767794}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623008.933456}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.5784109409658, "units": "Tflops", "t": 1712623009.9818826}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623009.4390833}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623009.9445107}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.56312766178485, "units": "Tflops", "t": 1712623010.6871383}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623010.4499366}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.55199268141921, "units": "Tflops", "t": 1712623011.392611}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623010.9555047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.32701524311578, "units": "Tflops", "t": 1712623012.0996048}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623011.460999}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623011.9666839}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.16349201889635, "units": "Tflops", "t": 1712623012.8078792}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623012.4722576}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.21251390623921, "units": "Tflops", "t": 1712623013.5158713}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623012.9777634}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623013.483183}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.31511816795683, "units": "Tflops", "t": 1712623014.2230046}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623013.988673}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.85180387635134, "units": "Tflops", "t": 1712623014.9336193}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623014.4942677}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.5612638274786, "units": "Tflops", "t": 1712623015.6466072}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623014.999742}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623015.5052178}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.50547060603141, "units": "Tflops", "t": 1712623016.3599243}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623016.0106668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.92130706707509, "units": "Tflops", "t": 1712623017.0700045}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623016.5161417}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623017.0217223}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.58808583573641, "units": "Tflops", "t": 1712623017.7826748}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623017.5271783}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.75298248214362, "units": "Tflops", "t": 1712623018.4941735}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623018.0326412}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.68127826127144, "units": "Tflops", "t": 1712623019.2060993}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623018.5382078}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623019.0436742}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.7250083712347, "units": "Tflops", "t": 1712623019.9177196}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623019.5492127}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.67662194482206, "units": "Tflops", "t": 1712623020.6296773}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623020.0548851}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623020.5604424}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.68677331651891, "units": "Tflops", "t": 1712623021.341746}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623021.0659506}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.69127540101898, "units": "Tflops", "t": 1712623022.0535996}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623021.5714943}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.54847774134608, "units": "Tflops", "t": 1712623022.766544}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623022.077117}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623022.582522}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.30279362577772, "units": "Tflops", "t": 1712623023.4814177}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623023.088055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.52375148821281, "units": "Tflops", "t": 1712623024.1945548}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623023.5934694}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623024.0988743}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.40550219713224, "units": "Tflops", "t": 1712623024.9087641}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623024.6044054}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.25496976324989, "units": "Tflops", "t": 1712623025.6239767}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623025.1099114}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623025.6154466}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.08586117079452, "units": "Tflops", "t": 1712623026.340537}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623026.120936}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.22249996438542, "units": "Tflops", "t": 1712623027.055997}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623026.62638}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.19742531544858, "units": "Tflops", "t": 1712623027.7716515}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623027.132516}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623027.6378732}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.89743701403441, "units": "Tflops", "t": 1712623028.4896834}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623028.143282}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.86717035881608, "units": "Tflops", "t": 1712623029.20807}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623028.6488526}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623029.1543324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.8183833653662, "units": "Tflops", "t": 1712623029.918984}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623029.6596901}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.64816650002453, "units": "Tflops", "t": 1712623030.6311603}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623030.1650763}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.00257922201564, "units": "Tflops", "t": 1712623031.3483407}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623030.6706643}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623031.1761262}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.5145328138153, "units": "Tflops", "t": 1712623032.0615726}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623031.6814826}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.56993437018025, "units": "Tflops", "t": 1712623032.7743568}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623032.186955}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623032.6925025}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.24140715247712, "units": "Tflops", "t": 1712623033.48984}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623033.1978936}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.4382554150294, "units": "Tflops", "t": 1712623034.203636}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623033.7033541}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.44458647028648, "units": "Tflops", "t": 1712623034.917378}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623034.208746}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623034.7142591}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.39041451155936, "units": "Tflops", "t": 1712623035.6315746}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623035.2196386}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.03957821822611, "units": "Tflops", "t": 1712623036.3484507}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623035.7250361}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623036.2304409}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.33344048427077, "units": "Tflops", "t": 1712623037.0630863}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623036.7359378}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.03293518162651, "units": "Tflops", "t": 1712623037.780027}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623037.241323}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623037.7466772}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.24017718095043, "units": "Tflops", "t": 1712623038.495559}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623038.2522306}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.90543415773632, "units": "Tflops", "t": 1712623039.2134862}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623038.757848}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.87717567547645, "units": "Tflops", "t": 1712623039.9316294}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623039.263311}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623039.7686636}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.06052383637027, "units": "Tflops", "t": 1712623040.6483762}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623040.27405}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.9141654533217, "units": "Tflops", "t": 1712623041.3662302}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623040.7793975}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623041.2849655}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.47148845525015, "units": "Tflops", "t": 1712623042.0876143}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623041.7904794}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.91886760736999, "units": "Tflops", "t": 1712623042.8054354}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623042.2958736}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623042.8012705}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.84027662349271, "units": "Tflops", "t": 1712623043.5239134}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623043.3066254}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.80234116740215, "units": "Tflops", "t": 1712623044.242781}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623043.811981}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.41917551471167, "units": "Tflops", "t": 1712623044.9645212}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623044.317373}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623044.8227198}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.80706234746496, "units": "Tflops", "t": 1712623045.6832542}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623045.328158}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.67881903652602, "units": "Tflops", "t": 1712623046.402969}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623045.8335593}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623046.3389757}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.56526342379955, "units": "Tflops", "t": 1712623047.1236045}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623046.8445172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.72425329101834, "units": "Tflops", "t": 1712623047.842945}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623047.3498883}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.59254204339663, "units": "Tflops", "t": 1712623048.5633183}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623047.8554265}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623048.3607957}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.5693844841155, "units": "Tflops", "t": 1712623049.2839131}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623048.8662121}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.5658391378761, "units": "Tflops", "t": 1712623050.004513}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623049.3716223}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623049.8770056}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.56150630972388, "units": "Tflops", "t": 1712623050.7251828}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623050.3823702}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.5623243647558, "units": "Tflops", "t": 1712623051.4459584}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623050.8878477}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623051.393243}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.56438471627182, "units": "Tflops", "t": 1712623052.1665988}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623051.8986564}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.19080156794898, "units": "Tflops", "t": 1712623052.8901527}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623052.4040213}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.43799644415854, "units": "Tflops", "t": 1712623053.6117477}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623052.9094834}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623053.4148254}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.37651726013047, "units": "Tflops", "t": 1712623054.3338902}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623053.9203157}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.17076047220235, "units": "Tflops", "t": 1712623055.0576012}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623054.4256735}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623054.9310958}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.09926126797762, "units": "Tflops", "t": 1712623055.7819135}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623055.4368086}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.34888462246634, "units": "Tflops", "t": 1712623056.5042105}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623055.9421837}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623056.4476187}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.25130877824122, "units": "Tflops", "t": 1712623057.227316}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623056.9532337}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.00311557105228, "units": "Tflops", "t": 1712623057.9523687}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623057.4586794}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.92724736926372, "units": "Tflops", "t": 1712623058.6781547}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623057.9647615}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623058.470128}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.9462249769573, "units": "Tflops", "t": 1712623059.4036999}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623058.9755013}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.01688529014208, "units": "Tflops", "t": 1712623060.1286366}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623059.4808471}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623059.9862804}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "30", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp16"], "time": 1712623060.8646693, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/fp16.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/fp16.D1.data new file mode 100644 index 000000000..b6f7ac19c --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/fp16.D1.data @@ -0,0 +1,222 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 30, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp16"}, "weight": 0.0, "name": "fp16", "tag": ["fp16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712622994.575693, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "30", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp16"], "time": 1712622994.586551}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 91.70418980853003, "units": "Tflops", "t": 1712622996.643229}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2393.125, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712622995.7396424}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.03, "temperature": null, "power": null}}, "t": 1712622996.245979}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.51615960137482, "units": "Tflops", "t": 1712622997.3418105}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.23, "temperature": null, "power": null}}, "t": 1712622996.7517054}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.36, "temperature": null, "power": null}}, "t": 1712622997.2573478}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.59464505128611, "units": "Tflops", "t": 1712622998.0393934}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.56, "temperature": null, "power": null}}, "t": 1712622997.76337}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 95.09898952356647, "units": "Tflops", "t": 1712622998.7336943}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.76, "temperature": null, "power": null}}, "t": 1712622998.2691383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.18757890113248, "units": "Tflops", "t": 1712622999.4344435}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.89, "temperature": null, "power": null}}, "t": 1712622998.7750297}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712622999.280922}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.40323252290072, "units": "Tflops", "t": 1712623000.133489}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712622999.7868829}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.34046882469497, "units": "Tflops", "t": 1712623000.8333347}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623000.292938}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623000.798655}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.8998798363642, "units": "Tflops", "t": 1712623001.5362039}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623001.304783}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.06641635845962, "units": "Tflops", "t": 1712623002.2376907}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623001.8106058}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.87576393945541, "units": "Tflops", "t": 1712623002.940616}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623002.3167546}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623002.8227825}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.77777137422436, "units": "Tflops", "t": 1712623003.6445663}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623003.3286362}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.69295694193272, "units": "Tflops", "t": 1712623004.348812}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623003.8346548}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623004.3405373}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.74447508079143, "units": "Tflops", "t": 1712623005.0528493}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623004.8462758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.8119500753148, "units": "Tflops", "t": 1712623005.756415}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623005.3524656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.33444458502044, "units": "Tflops", "t": 1712623006.4635222}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623005.8584332}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623006.3644679}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.72158171886194, "units": "Tflops", "t": 1712623007.1677718}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623006.870605}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.49215595170975, "units": "Tflops", "t": 1712623007.873912}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623007.3766305}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.32581910369925, "units": "Tflops", "t": 1712623008.5813255}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623007.8826532}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623008.3885982}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.98569568958946, "units": "Tflops", "t": 1712623009.2911797}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623008.8941534}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.38534848607807, "units": "Tflops", "t": 1712623009.997979}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623009.40012}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623009.905947}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.15398862214593, "units": "Tflops", "t": 1712623010.7067323}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623010.4120927}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.9530218358542, "units": "Tflops", "t": 1712623011.4169042}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623010.9180255}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.11962948357804, "units": "Tflops", "t": 1712623012.1258833}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623011.4240026}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623011.9298837}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.85840984437414, "units": "Tflops", "t": 1712623012.8369062}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623012.4358428}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.86127687768717, "units": "Tflops", "t": 1712623013.5477648}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623012.9414716}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623013.4476354}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.75167664817671, "units": "Tflops", "t": 1712623014.2595801}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623013.9536762}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.80151094635453, "units": "Tflops", "t": 1712623014.9709868}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623014.4597566}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623014.9664018}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.64019466266264, "units": "Tflops", "t": 1712623015.6835904}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623015.472422}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.55504061789287, "units": "Tflops", "t": 1712623016.396877}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623015.9784062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.62459616436136, "units": "Tflops", "t": 1712623017.1096509}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623016.4845476}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623016.9904583}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.47137179405848, "units": "Tflops", "t": 1712623017.823562}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623017.4962974}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.25487748711814, "units": "Tflops", "t": 1712623018.5390491}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623018.0020373}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623018.5082588}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.49545166494232, "units": "Tflops", "t": 1712623019.2527041}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623019.0141184}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.64875591211539, "units": "Tflops", "t": 1712623019.965068}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623019.5202656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.51403790339505, "units": "Tflops", "t": 1712623020.678609}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623020.0262325}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623020.5323527}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.69652321009468, "units": "Tflops", "t": 1712623021.3907018}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623021.0382962}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.52919693743462, "units": "Tflops", "t": 1712623022.1040637}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623021.5442038}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623022.050078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.20049745596445, "units": "Tflops", "t": 1712623022.820127}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623022.555964}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.14369597757768, "units": "Tflops", "t": 1712623023.536554}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623023.0618203}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.61693838728154, "units": "Tflops", "t": 1712623024.2492573}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623023.5677712}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623024.0737603}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.99881672318124, "units": "Tflops", "t": 1712623024.9668403}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623024.5797675}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.74301754868569, "units": "Tflops", "t": 1712623025.6864083}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623025.085748}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623025.5916467}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.17466713180531, "units": "Tflops", "t": 1712623026.4026227}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623026.0977418}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.36096298855833, "units": "Tflops", "t": 1712623027.1173947}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623026.6039402}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623027.1099854}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.1595012502076, "units": "Tflops", "t": 1712623027.833589}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623027.6155696}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.90381630262677, "units": "Tflops", "t": 1712623028.551854}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623028.1210506}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.11854148551038, "units": "Tflops", "t": 1712623029.2685163}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623028.6266153}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623029.1333055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.14995605849059, "units": "Tflops", "t": 1712623029.9848218}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623029.6388092}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.99740968886032, "units": "Tflops", "t": 1712623030.7023852}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623030.144329}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623030.6499572}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.61758940409794, "units": "Tflops", "t": 1712623031.4152353}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623031.155502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.13956472857632, "units": "Tflops", "t": 1712623032.1238508}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623031.661176}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.73955280271805, "units": "Tflops", "t": 1712623032.8357196}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623032.1667213}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623032.6724074}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.45752922763144, "units": "Tflops", "t": 1712623033.5496342}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623033.177893}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.75829947149221, "units": "Tflops", "t": 1712623034.26137}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623033.6834056}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623034.1889622}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.85844100682803, "units": "Tflops", "t": 1712623034.972308}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623034.694549}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.13777423297981, "units": "Tflops", "t": 1712623035.6888282}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623035.2001467}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.34598230103173, "units": "Tflops", "t": 1712623036.403688}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623035.705657}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623036.2113352}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.39248144917126, "units": "Tflops", "t": 1712623037.118279}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623036.716905}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.3913091448458, "units": "Tflops", "t": 1712623037.8328185}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623037.222537}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623037.728182}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.98924362208147, "units": "Tflops", "t": 1712623038.5503223}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623038.2337997}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.66572895668912, "units": "Tflops", "t": 1712623039.2705257}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623038.739361}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623039.2448957}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.30636547817907, "units": "Tflops", "t": 1712623039.9857078}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623039.7505426}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.99777673714256, "units": "Tflops", "t": 1712623040.7031956}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623040.256156}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.78714531089516, "units": "Tflops", "t": 1712623041.4223876}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623040.761651}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623041.2672946}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.94220229247908, "units": "Tflops", "t": 1712623042.1403}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623041.7727866}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.12357128514782, "units": "Tflops", "t": 1712623042.8567107}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623042.2783692}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623042.7839527}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.9027784633014, "units": "Tflops", "t": 1712623043.575108}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623043.2895126}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.74824979612652, "units": "Tflops", "t": 1712623044.2946892}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623043.795011}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.88614570804944, "units": "Tflops", "t": 1712623045.0131676}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623044.3006394}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623044.806173}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.80179293279893, "units": "Tflops", "t": 1712623045.7322848}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623045.3119488}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.82299610696131, "units": "Tflops", "t": 1712623046.4512634}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623045.817501}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623046.323064}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.58781258492799, "units": "Tflops", "t": 1712623047.1720521}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623046.8287315}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.22555633694935, "units": "Tflops", "t": 1712623047.8957145}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623047.3342905}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623047.8397963}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.57247552279765, "units": "Tflops", "t": 1712623048.6167274}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623048.3453534}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.78090398193555, "units": "Tflops", "t": 1712623049.335958}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623048.8509364}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.37135749649583, "units": "Tflops", "t": 1712623050.0584745}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623049.3569517}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623049.8625834}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.44615559451591, "units": "Tflops", "t": 1712623050.7804582}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623050.3682342}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.45652283747489, "units": "Tflops", "t": 1712623051.502238}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623050.8737676}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623051.3793852}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.13982975192194, "units": "Tflops", "t": 1712623052.2264097}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623051.8849592}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.16168928973029, "units": "Tflops", "t": 1712623052.9505439}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623052.390643}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623052.8961968}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.6268452303057, "units": "Tflops", "t": 1712623053.671098}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623053.4017353}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.2917417466255, "units": "Tflops", "t": 1712623054.3942606}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623053.9074473}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.29110923532329, "units": "Tflops", "t": 1712623055.1172082}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623054.4130347}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623054.918565}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.73091265929129, "units": "Tflops", "t": 1712623055.8367677}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623055.424327}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.222759341934, "units": "Tflops", "t": 1712623056.5604103}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623055.9298296}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623056.4354153}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.30481560947548, "units": "Tflops", "t": 1712623057.283447}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623056.9409916}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.2253458044737, "units": "Tflops", "t": 1712623058.0070715}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623057.446526}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623057.9523103}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.16201966536177, "units": "Tflops", "t": 1712623058.7313108}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623058.4579241}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.92387108174415, "units": "Tflops", "t": 1712623059.4574041}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623058.964685}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.16382175641448, "units": "Tflops", "t": 1712623060.1815817}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623059.47033}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3078.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623059.9759045}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "30", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp16"], "time": 1712623060.8045602, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/fp32.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/fp32.D0.data new file mode 100644 index 000000000..22fa0eca6 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/fp32.D0.data @@ -0,0 +1,350 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp32"}, "weight": 0.0, "name": "fp32", "tag": ["fp32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623143.168964, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32"], "time": 1712623145.5541213}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 15.62976628799767, "units": "Tflops", "t": 1712623148.2794623}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2390.625, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712623146.7343583}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.07, "temperature": null, "power": null}}, "t": 1712623147.240455}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.27, "temperature": null, "power": null}}, "t": 1712623147.7458909}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.4, "temperature": null, "power": null}}, "t": 1712623148.2515116}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.586012522474478, "units": "Tflops", "t": 1712623149.6911802}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.6, "temperature": null, "power": null}}, "t": 1712623148.757368}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.73, "temperature": null, "power": null}}, "t": 1712623149.2630699}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.550587168804276, "units": "Tflops", "t": 1712623151.106653}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.93, "temperature": null, "power": null}}, "t": 1712623149.7688088}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623150.2747898}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623150.7805328}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.73362784750848, "units": "Tflops", "t": 1712623152.5054889}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623151.2865005}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623151.7925987}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623152.29853}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.694724377272777, "units": "Tflops", "t": 1712623153.9070761}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623152.80427}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623153.3103147}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623153.8162634}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.733724468877764, "units": "Tflops", "t": 1712623155.3054283}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623154.322284}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623154.8281431}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.688925859428226, "units": "Tflops", "t": 1712623156.7076564}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623155.3340673}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623155.839721}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623156.3455276}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.626366234271808, "units": "Tflops", "t": 1712623158.1155984}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623156.8512418}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623157.356838}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623157.8628244}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.657126390428092, "units": "Tflops", "t": 1712623159.5207253}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623158.3689232}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623158.8746297}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623159.3802693}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.614448609162695, "units": "Tflops", "t": 1712623160.9294395}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623159.8861175}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623160.3921404}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623160.8977985}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.581536373901878, "units": "Tflops", "t": 1712623162.3412306}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623161.403583}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623161.9096243}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.559776852572508, "units": "Tflops", "t": 1712623163.7550926}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623162.4156017}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623162.921646}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623163.427552}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.592093669920724, "units": "Tflops", "t": 1712623165.1661284}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623163.9334157}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623164.439304}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623164.9452693}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.52491997886349, "units": "Tflops", "t": 1712623166.5832126}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623165.451152}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623165.956867}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623166.4625702}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.541646641821664, "units": "Tflops", "t": 1712623167.998523}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623166.9682386}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623167.4739873}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623167.9796324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.500848179416678, "units": "Tflops", "t": 1712623169.4174123}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623168.4855387}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623168.9913487}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.506722256695717, "units": "Tflops", "t": 1712623170.8357658}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623169.497226}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623170.0032716}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623170.509203}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.471931121826229, "units": "Tflops", "t": 1712623172.2573366}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623171.0151463}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623171.5208285}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623172.0266166}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.526440997535323, "units": "Tflops", "t": 1712623173.6738944}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623172.5324461}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623173.0385096}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623173.5443}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.4967306407131, "units": "Tflops", "t": 1712623175.093186}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623174.0498538}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623174.5555212}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623175.0612988}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.478928808114802, "units": "Tflops", "t": 1712623176.5143802}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623175.5671601}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623176.0731006}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.432643797985213, "units": "Tflops", "t": 1712623177.939771}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623176.5791144}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623177.0853786}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623177.5909264}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.488538535492522, "units": "Tflops", "t": 1712623179.3601747}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623178.0968945}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623178.6029475}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623179.108967}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.40091960445591, "units": "Tflops", "t": 1712623180.7887197}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623179.6151142}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623180.1212611}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623180.62712}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.459190567868566, "units": "Tflops", "t": 1712623182.2117493}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623181.133025}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623181.6387794}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623182.1447358}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.430849373271085, "units": "Tflops", "t": 1712623183.6374407}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623182.650596}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623183.1563785}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.428967612108977, "units": "Tflops", "t": 1712623185.0632534}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623183.662359}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623184.168422}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623184.6743152}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.374528846881462, "units": "Tflops", "t": 1712623186.4939604}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623185.1799312}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623185.685834}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623186.191722}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.384437869935564, "units": "Tflops", "t": 1712623187.9239912}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623186.6976998}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623187.2035174}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623187.7092693}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.429584490095262, "units": "Tflops", "t": 1712623189.3495817}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623188.2151525}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623188.7207992}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623189.226558}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.383814332964795, "units": "Tflops", "t": 1712623190.7792835}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623189.7324278}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623190.238201}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623190.744012}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.362196325471512, "units": "Tflops", "t": 1712623192.2109926}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623191.250554}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623191.7565503}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.406378475928777, "units": "Tflops", "t": 1712623193.63856}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623192.2626312}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623192.7685645}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623193.2743196}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.34377288348282, "units": "Tflops", "t": 1712623195.0719833}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623193.7801118}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623194.2857714}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623194.7915013}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.351786975217053, "units": "Tflops", "t": 1712623196.5046751}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623195.2972264}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623195.8029387}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623196.3089697}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.416807899838041, "units": "Tflops", "t": 1712623197.9313083}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623196.8151894}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623197.3209934}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623197.8266954}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.390809567405666, "units": "Tflops", "t": 1712623199.3603685}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623198.3324423}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623198.8379476}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623199.3437696}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.35338670905245, "units": "Tflops", "t": 1712623200.792884}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623199.849249}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623200.3551397}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.362725991615843, "units": "Tflops", "t": 1712623202.2245295}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623200.861123}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623201.366894}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623201.8725817}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.359592025282396, "units": "Tflops", "t": 1712623203.6564894}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623202.378284}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623202.883945}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623203.3896947}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.395120257286225, "units": "Tflops", "t": 1712623205.0851436}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623203.895643}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623204.4014237}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623204.9073312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.282803367242195, "units": "Tflops", "t": 1712623206.5242908}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623205.4131105}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623205.9189234}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623206.4245973}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.315646913198467, "units": "Tflops", "t": 1712623207.9603505}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623206.9303753}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623207.4361382}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623207.9418938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.358632903440135, "units": "Tflops", "t": 1712623209.3923862}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623208.4477549}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623208.9536421}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.36454043895332, "units": "Tflops", "t": 1712623210.823848}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623209.4595373}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623209.9653819}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623210.4713094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.299796293253856, "units": "Tflops", "t": 1712623212.261403}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623210.977123}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623211.4830146}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623211.9886806}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.337105963196048, "units": "Tflops", "t": 1712623213.6954606}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623212.4945838}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623213.00027}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623213.5060067}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.341891883288518, "units": "Tflops", "t": 1712623215.1290634}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623214.0117872}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623214.5175831}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623215.0232813}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.344089406458691, "units": "Tflops", "t": 1712623216.5624666}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623215.5289412}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623216.0344772}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623216.5403383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.314095716717055, "units": "Tflops", "t": 1712623217.999008}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623217.0462832}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623217.5522556}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.311352648335134, "units": "Tflops", "t": 1712623219.435748}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623218.0581934}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623218.5641136}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623219.0701606}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.31543328715715, "units": "Tflops", "t": 1712623220.8721755}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623219.576176}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623220.0822496}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623220.5882049}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.293078775420465, "units": "Tflops", "t": 1712623222.3107188}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623221.094266}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623221.6001718}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623222.1060634}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.284300107141183, "units": "Tflops", "t": 1712623223.749946}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623222.6119094}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623223.1178586}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623223.623626}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.323820039277066, "units": "Tflops", "t": 1712623225.185607}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623224.1293428}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623224.635378}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623225.1424954}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.290028920835352, "units": "Tflops", "t": 1712623226.6244035}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623225.648441}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623226.1544676}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.284193730161883, "units": "Tflops", "t": 1712623228.063738}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623226.6603825}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623227.166513}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623227.6724968}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.275503682675145, "units": "Tflops", "t": 1712623229.5039146}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623228.1782725}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623228.6840498}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623229.190034}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.267518471751721, "units": "Tflops", "t": 1712623230.9446933}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623229.695949}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623230.2019284}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623230.7078245}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.281334770426842, "units": "Tflops", "t": 1712623232.3841107}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623231.2139034}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623231.719584}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623232.2252324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.265165970090512, "units": "Tflops", "t": 1712623233.8249192}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623232.7312286}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623233.2370214}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623233.7425423}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.19924334263539, "units": "Tflops", "t": 1712623235.2719972}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623234.2485018}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623234.7541943}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623235.2598674}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.256744856502193, "units": "Tflops", "t": 1712623236.7135925}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623235.7656553}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623236.2714305}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.254902793578506, "units": "Tflops", "t": 1712623238.1553519}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623236.7771225}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623237.2827394}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623237.7885091}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.260690386193854, "units": "Tflops", "t": 1712623239.5965753}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623238.2945452}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623238.8003397}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623239.3062758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.229791720160234, "units": "Tflops", "t": 1712623241.04074}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623239.8119082}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623240.3177161}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623240.8235116}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.191960686733216, "units": "Tflops", "t": 1712623242.4884946}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623241.3295557}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623241.835622}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623242.3413818}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.19265635567003, "units": "Tflops", "t": 1712623243.9361951}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623242.847141}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623243.3528843}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623243.8586073}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.210515292695826, "units": "Tflops", "t": 1712623245.3821833}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623244.3647761}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623244.870404}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623245.3761282}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.258466199476299, "units": "Tflops", "t": 1712623246.8236322}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623245.8823876}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623246.3912947}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.212128369903493, "units": "Tflops", "t": 1712623248.2694557}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623246.8974283}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623247.403174}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623247.908928}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.2003880727743, "units": "Tflops", "t": 1712623249.7163806}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623248.4153736}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623248.9210052}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623249.4267824}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.229012181487855, "units": "Tflops", "t": 1712623251.1606236}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623249.9324074}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623250.4380903}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623250.943781}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.197718138691217, "units": "Tflops", "t": 1712623252.607814}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623251.4494083}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623251.9551082}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623252.4609704}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.204261895836751, "units": "Tflops", "t": 1712623254.0543878}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623252.9668636}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623253.4725225}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623253.9783337}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.161302048780465, "units": "Tflops", "t": 1712623255.5050437}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623254.48428}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623254.9902194}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623255.4959342}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.203484968401295, "units": "Tflops", "t": 1712623256.9516847}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623256.001578}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623256.5074155}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.170848205617109, "units": "Tflops", "t": 1712623258.401399}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623257.013132}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623257.5185776}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623258.0244126}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.133170030632277, "units": "Tflops", "t": 1712623259.8547711}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623258.5300646}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623259.0357366}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623259.541619}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.181928122329465, "units": "Tflops", "t": 1712623261.3040497}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623260.047471}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623260.5530705}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623261.0590227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.20911071409159, "units": "Tflops", "t": 1712623262.75057}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623261.564893}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623262.070633}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623262.576513}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.146754084236033, "units": "Tflops", "t": 1712623264.2028444}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623263.0828197}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623263.5887723}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623264.0945861}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.155447635143537, "units": "Tflops", "t": 1712623265.6541693}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623264.6004422}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623265.106242}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623265.6119308}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.16745779430806, "units": "Tflops", "t": 1712623267.1042833}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623266.1177864}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623266.6235235}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.185847546784824, "units": "Tflops", "t": 1712623268.5525684}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623267.1294527}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623267.635274}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623268.141172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.104499064182402, "units": "Tflops", "t": 1712623270.0087087}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623268.6468441}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623269.1524715}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623269.658107}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.15407561625503, "units": "Tflops", "t": 1712623271.4600737}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623270.1639347}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623270.669637}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623271.1754088}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.141306103482664, "units": "Tflops", "t": 1712623272.9126792}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623271.681272}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623272.187195}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623272.692953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.200753821591542, "units": "Tflops", "t": 1712623274.359585}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623273.1988301}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623273.7046325}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623274.2107391}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.171614314160282, "units": "Tflops", "t": 1712623275.809299}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623274.7165935}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623275.22216}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623275.7278378}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32"], "time": 1712623276.6227868, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/fp32.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/fp32.D1.data new file mode 100644 index 000000000..6e5406099 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/fp32.D1.data @@ -0,0 +1,346 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp32"}, "weight": 0.0, "name": "fp32", "tag": ["fp32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623145.544499, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32"], "time": 1712623145.5546074}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 15.830753257814223, "units": "Tflops", "t": 1712623148.24809}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2393.125, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712623146.7253928}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.11, "temperature": null, "power": null}}, "t": 1712623147.2318418}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.24, "temperature": null, "power": null}}, "t": 1712623147.7376459}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.44, "temperature": null, "power": null}}, "t": 1712623148.2433505}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.813650998681492, "units": "Tflops", "t": 1712623149.6394134}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.57, "temperature": null, "power": null}}, "t": 1712623148.7488172}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.77, "temperature": null, "power": null}}, "t": 1712623149.2544632}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.733010572325135, "units": "Tflops", "t": 1712623151.0383995}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.91, "temperature": null, "power": null}}, "t": 1712623149.7599366}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623150.265528}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623150.7711902}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.790214223039595, "units": "Tflops", "t": 1712623152.4320095}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623151.276876}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623151.782587}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623152.2880752}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.758805578994897, "units": "Tflops", "t": 1712623153.8279066}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623152.7937145}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623153.2992947}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623153.8047743}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.93276144377986, "units": "Tflops", "t": 1712623155.208458}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623154.3104286}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623154.8159285}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.918327821233802, "units": "Tflops", "t": 1712623156.5902038}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623155.321662}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623155.8271058}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623156.3327506}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.889380121856924, "units": "Tflops", "t": 1712623157.974518}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623156.8381813}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623157.343793}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623157.849403}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.889558049080742, "units": "Tflops", "t": 1712623159.3587966}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623158.355114}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623158.8607788}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.847089293972655, "units": "Tflops", "t": 1712623160.746783}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623159.366521}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623159.8720303}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623160.3775032}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.869644016509366, "units": "Tflops", "t": 1712623162.1328228}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623160.8830214}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623161.3886502}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623161.8943083}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.810926630445298, "units": "Tflops", "t": 1712623163.5240066}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623162.3999226}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623162.9054134}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623163.4109137}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.834794689651531, "units": "Tflops", "t": 1712623164.913068}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623163.9165475}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623164.4222558}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.772654533210805, "units": "Tflops", "t": 1712623166.3074517}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623164.9277878}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623165.4333866}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623165.93908}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.79944039871084, "units": "Tflops", "t": 1712623167.6996546}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623166.444576}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623166.9502096}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623167.4557748}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.774523943206438, "units": "Tflops", "t": 1712623169.0939057}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623167.961307}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623168.466913}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623168.9723773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.780947514776013, "units": "Tflops", "t": 1712623170.4877057}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623169.477937}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623169.9836886}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.755123089906787, "units": "Tflops", "t": 1712623171.8836446}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623170.489462}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623170.9949713}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623171.500583}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.696922633536783, "units": "Tflops", "t": 1712623173.2849305}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623172.006124}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623172.511928}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623173.0176132}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.736790129465028, "units": "Tflops", "t": 1712623174.6825275}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623173.5232594}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623174.028925}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623174.5345845}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.694187594136698, "units": "Tflops", "t": 1712623176.084023}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623175.0402188}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623175.5459206}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623176.0516582}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.711473881992692, "units": "Tflops", "t": 1712623177.4838676}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623176.5576727}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623177.0633729}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.704162800555432, "units": "Tflops", "t": 1712623178.8843403}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623177.5688844}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623178.0744274}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623178.5799644}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.721688145723203, "units": "Tflops", "t": 1712623180.283432}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623179.08547}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623179.591106}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623180.096673}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.710328484479927, "units": "Tflops", "t": 1712623181.6833708}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623180.6022959}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623181.107825}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623181.6135902}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.619062712596074, "units": "Tflops", "t": 1712623183.0915024}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623182.1191628}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623182.6250367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.67252263206669, "units": "Tflops", "t": 1712623184.4949121}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623183.1305823}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623183.6361423}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623184.1418238}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.676323802805438, "units": "Tflops", "t": 1712623185.8978972}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623184.647892}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623185.1533742}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623185.6588516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.654009324910147, "units": "Tflops", "t": 1712623187.3028858}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623186.1644528}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623186.6700466}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623187.1755335}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.63326321308276, "units": "Tflops", "t": 1712623188.7097397}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623187.6811895}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623188.186874}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623188.6923883}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.678388981501294, "units": "Tflops", "t": 1712623190.1126864}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623189.1979895}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623189.7034874}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.635075871650029, "units": "Tflops", "t": 1712623191.5193415}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623190.2090003}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623190.7145424}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623191.2200243}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.61080684771873, "units": "Tflops", "t": 1712623192.9282072}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623191.7256114}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623192.2311904}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623192.736744}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.624396786506093, "units": "Tflops", "t": 1712623194.3358612}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623193.2422888}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623193.74783}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623194.2533374}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.614141980295074, "units": "Tflops", "t": 1712623195.7445538}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623194.7590206}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623195.2646923}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.626339759907875, "units": "Tflops", "t": 1712623197.151991}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623195.7701702}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623196.2756886}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623196.7811859}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.570683182981112, "units": "Tflops", "t": 1712623198.5644994}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623197.286708}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623197.7922149}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623198.297896}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.595394440963721, "units": "Tflops", "t": 1712623199.9747574}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623198.803401}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623199.3093035}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623199.8148751}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.566951456049187, "units": "Tflops", "t": 1712623201.3875868}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623200.3203468}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623200.8259552}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623201.3316383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.596219852097688, "units": "Tflops", "t": 1712623202.7979288}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623201.8372936}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623202.342946}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.528874724521797, "units": "Tflops", "t": 1712623204.2141905}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623202.8485167}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623203.3540194}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623203.8595793}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.577212750429569, "units": "Tflops", "t": 1712623205.6261013}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623204.365453}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623204.8712575}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623205.3769147}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.60313247249985, "units": "Tflops", "t": 1712623207.0356576}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623205.882485}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623206.388057}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623206.8936353}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.573430561653854, "units": "Tflops", "t": 1712623208.4479055}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623207.399133}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623207.905015}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623208.4105642}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.559750603360582, "units": "Tflops", "t": 1712623209.8615203}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623208.9161415}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623209.4217176}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.57211064726882, "units": "Tflops", "t": 1712623211.2738624}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623209.9272287}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623210.4329376}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623210.9386115}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.551048623864046, "units": "Tflops", "t": 1712623212.6881452}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623211.44413}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623211.9496784}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623212.455155}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.54202375991057, "units": "Tflops", "t": 1712623214.103246}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623212.9607673}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623213.4664302}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623213.9720306}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.573285938605563, "units": "Tflops", "t": 1712623215.5155063}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623214.4778936}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623214.9834533}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623215.488975}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.518998163731826, "units": "Tflops", "t": 1712623216.932707}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623215.9944527}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623216.500205}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.495915726409207, "units": "Tflops", "t": 1712623218.3521576}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623217.005836}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623217.5114572}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623218.016993}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.563583925986906, "units": "Tflops", "t": 1712623219.7653084}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623218.52249}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623219.0279727}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623219.5337174}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.489204406016038, "units": "Tflops", "t": 1712623221.1852386}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623220.0392148}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623220.5447636}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623221.0503793}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.514784855393128, "units": "Tflops", "t": 1712623222.6028166}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623221.5559273}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623222.061486}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623222.5670536}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.510362560666776, "units": "Tflops", "t": 1712623224.0208066}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623223.0726426}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623223.578161}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.518883272443889, "units": "Tflops", "t": 1712623225.437979}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623224.083624}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623224.5892918}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623225.09485}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.528723084443982, "units": "Tflops", "t": 1712623226.8544161}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623225.6008012}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623226.106304}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623226.6118894}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.479074281715134, "units": "Tflops", "t": 1712623228.275269}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623227.1175585}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623227.6230993}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623228.128695}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.51951258422151, "units": "Tflops", "t": 1712623229.69242}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623228.6343675}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623229.1400478}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623229.6458056}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.421460625539261, "units": "Tflops", "t": 1712623231.1186302}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623230.1517532}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623230.6574292}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.510519059031894, "units": "Tflops", "t": 1712623232.5365663}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623231.1629505}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623231.6685958}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623232.1741056}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.470931968813108, "units": "Tflops", "t": 1712623233.9581635}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623232.6796753}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623233.1852999}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623233.690853}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.472021960317829, "units": "Tflops", "t": 1712623235.379665}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623234.1963584}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623234.7018635}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623235.2075095}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.475053982412833, "units": "Tflops", "t": 1712623236.8008847}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623235.713012}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623236.2185848}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623236.7241063}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.474830693682982, "units": "Tflops", "t": 1712623238.2221272}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623237.229696}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623237.7352273}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.444334548761264, "units": "Tflops", "t": 1712623239.6463091}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623238.240757}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623238.7462344}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623239.251835}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.497063921095984, "units": "Tflops", "t": 1712623241.065512}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623239.7573059}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623240.262966}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623240.768433}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.465696960114222, "units": "Tflops", "t": 1712623242.4875984}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623241.273931}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623241.7797134}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623242.2860172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.445045764513623, "units": "Tflops", "t": 1712623243.9115763}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623242.791773}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623243.2974327}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623243.8031073}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.466041873931063, "units": "Tflops", "t": 1712623245.333633}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623244.308731}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623244.8142886}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623245.3197942}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.434498044877511, "units": "Tflops", "t": 1712623246.7585943}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623245.827237}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623246.332962}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.438099345601591, "units": "Tflops", "t": 1712623248.1831894}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623246.8385482}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623247.344074}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623247.849558}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.449055660510982, "units": "Tflops", "t": 1712623249.6068113}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623248.3550918}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623248.860726}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623249.366241}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.431166917325502, "units": "Tflops", "t": 1712623251.0322201}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623249.8717835}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623250.3772936}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623250.8828423}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.444965587825738, "units": "Tflops", "t": 1712623252.4562101}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623251.388423}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623251.8940408}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623252.3997426}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.470241720290552, "units": "Tflops", "t": 1712623253.8778777}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623252.9055157}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623253.411234}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.425138393941737, "units": "Tflops", "t": 1712623255.3036613}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623253.9167042}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623254.4224048}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623254.9279134}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.42167464112951, "units": "Tflops", "t": 1712623256.7298112}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623255.4333863}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623255.9389758}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623256.4445899}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.452415227252208, "units": "Tflops", "t": 1712623258.1531146}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623256.9500868}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623257.4555774}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623257.961059}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.451281404270627, "units": "Tflops", "t": 1712623259.5765262}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623258.466579}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623258.9721086}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623259.4776971}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.445992429633431, "units": "Tflops", "t": 1712623261.0004308}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623259.983253}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623260.4890084}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623260.9945345}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.453518146226935, "units": "Tflops", "t": 1712623262.4236379}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623261.5001075}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623262.0058057}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.409335908059571, "units": "Tflops", "t": 1712623263.8508973}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623262.5113723}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623263.0169685}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623263.522537}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.430072349038442, "units": "Tflops", "t": 1712623265.27642}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623264.028117}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623264.5336072}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623265.0389261}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.446336465105006, "units": "Tflops", "t": 1712623266.7003036}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623265.5444288}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623266.0499384}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623266.5555406}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.45413181015789, "units": "Tflops", "t": 1712623268.123451}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623267.0611815}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623267.5666897}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623268.07232}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.453538859826452, "units": "Tflops", "t": 1712623269.5466483}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623268.577844}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623269.083415}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.43428625598157, "units": "Tflops", "t": 1712623270.9715965}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623269.5890028}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623270.0945287}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623270.6000333}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.443773381110223, "units": "Tflops", "t": 1712623272.3957005}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623271.1056244}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623271.6111553}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623272.1166632}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 15.385887852953305, "units": "Tflops", "t": 1712623273.825148}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623272.6222053}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623273.1277604}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623273.633431}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32"], "time": 1712623274.5361629, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/llama.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/llama.D0.data new file mode 100644 index 000000000..aa155b372 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/llama.D0.data @@ -0,0 +1,32 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/llm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "llm", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 800, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama", "plan": {"method": "per_gpu"}, "tags": ["llm", "nlp"], "weight": 1.0, "name": "llama", "tag": ["llama", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712622931.394082, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["python", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1712622933.7213192}, "pipe": null} +{"event": "line", "data": "Dataset\n", "pipe": "stderr"} +{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/datasets/table.py:1421: FutureWarning: promote has been superseded by mode='default'.\n", "pipe": "stderr"} +{"event": "line", "data": " table = cls._concat_blocks(blocks, axis=0)\n", "pipe": "stderr"} +{"event": "line", "data": "Tokenizer\n", "pipe": "stderr"} +{"event": "line", "data": "Model\n", "pipe": "stderr"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py\", line 231, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py\", line 227, in main\n", "pipe": "stderr"} +{"event": "line", "data": " return huggingface_main(args, model, config)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py\", line 143, in huggingface_main\n", "pipe": "stderr"} +{"event": "line", "data": " model = LlamaForCausalLM(LlamaConfig.from_dict(config)).cuda()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/modeling_utils.py\", line 2243, in cuda\n", "pipe": "stderr"} +{"event": "line", "data": " return super().cuda(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 918, in cuda\n", "pipe": "stderr"} +{"event": "line", "data": " return self._apply(lambda t: t.cuda(device))\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 810, in _apply\n", "pipe": "stderr"} +{"event": "line", "data": " module._apply(fn)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 810, in _apply\n", "pipe": "stderr"} +{"event": "line", "data": " module._apply(fn)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 810, in _apply\n", "pipe": "stderr"} +{"event": "line", "data": " module._apply(fn)\n", "pipe": "stderr"} +{"event": "line", "data": " [Previous line repeated 2 more times]\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 833, in _apply\n", "pipe": "stderr"} +{"event": "line", "data": " param_applied = fn(param)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 918, in \n", "pipe": "stderr"} +{"event": "line", "data": " return self._apply(lambda t: t.cuda(device))\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 172.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 169.62 MiB is free. Including non-PyTorch memory, this process has 21.43 GiB memory in use. Of the allocated memory 21.20 GiB is allocated by PyTorch, and 9.14 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["python", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1712622989.8527756, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/llama.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/llama.D1.data new file mode 100644 index 000000000..3a7487f6a --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/llama.D1.data @@ -0,0 +1,32 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/llm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "llm", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 800, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama", "plan": {"method": "per_gpu"}, "tags": ["llm", "nlp"], "weight": 1.0, "name": "llama", "tag": ["llama", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712622933.712586, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["python", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1712622933.7219312}, "pipe": null} +{"event": "line", "data": "Dataset\n", "pipe": "stderr"} +{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/datasets/table.py:1421: FutureWarning: promote has been superseded by mode='default'.\n", "pipe": "stderr"} +{"event": "line", "data": " table = cls._concat_blocks(blocks, axis=0)\n", "pipe": "stderr"} +{"event": "line", "data": "Tokenizer\n", "pipe": "stderr"} +{"event": "line", "data": "Model\n", "pipe": "stderr"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py\", line 231, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py\", line 227, in main\n", "pipe": "stderr"} +{"event": "line", "data": " return huggingface_main(args, model, config)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py\", line 143, in huggingface_main\n", "pipe": "stderr"} +{"event": "line", "data": " model = LlamaForCausalLM(LlamaConfig.from_dict(config)).cuda()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/modeling_utils.py\", line 2243, in cuda\n", "pipe": "stderr"} +{"event": "line", "data": " return super().cuda(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 918, in cuda\n", "pipe": "stderr"} +{"event": "line", "data": " return self._apply(lambda t: t.cuda(device))\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 810, in _apply\n", "pipe": "stderr"} +{"event": "line", "data": " module._apply(fn)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 810, in _apply\n", "pipe": "stderr"} +{"event": "line", "data": " module._apply(fn)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 810, in _apply\n", "pipe": "stderr"} +{"event": "line", "data": " module._apply(fn)\n", "pipe": "stderr"} +{"event": "line", "data": " [Previous line repeated 2 more times]\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 833, in _apply\n", "pipe": "stderr"} +{"event": "line", "data": " param_applied = fn(param)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 918, in \n", "pipe": "stderr"} +{"event": "line", "data": " return self._apply(lambda t: t.cuda(device))\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 172.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 169.62 MiB is free. Including non-PyTorch memory, this process has 21.43 GiB memory in use. Of the allocated memory 21.20 GiB is allocated by PyTorch, and 9.14 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["python", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1712622989.2747521, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/opt-1_3b-multinode.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/opt-1_3b-multinode.data new file mode 100644 index 000000000..3b45f0015 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/opt-1_3b-multinode.data @@ -0,0 +1 @@ +{"event": "message", "data": {"message": "Skip opt-1_3b-multinode because the following capability is not satisfied: len(nodes) >= 2"}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/opt-1_3b.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/opt-1_3b.data new file mode 100644 index 000000000..3e2c6cea0 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/opt-1_3b.data @@ -0,0 +1 @@ +{"event": "error", "data": {"type": "KeyError", "message": "'port'", "trace": "Traceback (most recent call last):\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/multi.py\", line 202, in do_run\n await exec_plan.execute(\"run\", timeout=True, timeout_delay=600)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 108, in execute\n return await execute_command(self, phase, timeout, timeout_delay, **kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/executors.py\", line 57, in execute_command\n for pack, argv, _kwargs in command.commands():\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 125, in commands\n yield self.pack, self.argv(), self.kwargs()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 121, in argv\n return self._argv(**kwargs) + self.exec.argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 304, in argv\n script_args = self.exec.argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 122, in argv\n return self._argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 682, in _argv\n f\"--main_process_port={manager['port']}\",\nKeyError: 'port'\n"}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/opt-6_7b-multinode.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/opt-6_7b-multinode.data new file mode 100644 index 000000000..cccd5c098 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/opt-6_7b-multinode.data @@ -0,0 +1 @@ +{"event": "message", "data": {"message": "Skip opt-6_7b-multinode because the following capability is not satisfied: len(nodes) >= 2"}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/opt-6_7b.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/opt-6_7b.data new file mode 100644 index 000000000..3e2c6cea0 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/opt-6_7b.data @@ -0,0 +1 @@ +{"event": "error", "data": {"type": "KeyError", "message": "'port'", "trace": "Traceback (most recent call last):\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/multi.py\", line 202, in do_run\n await exec_plan.execute(\"run\", timeout=True, timeout_delay=600)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 108, in execute\n return await execute_command(self, phase, timeout, timeout_delay, **kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/executors.py\", line 57, in execute_command\n for pack, argv, _kwargs in command.commands():\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 125, in commands\n yield self.pack, self.argv(), self.kwargs()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 121, in argv\n return self._argv(**kwargs) + self.exec.argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 304, in argv\n script_args = self.exec.argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 122, in argv\n return self._argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 682, in _argv\n f\"--main_process_port={manager['port']}\",\nKeyError: 'port'\n"}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/reformer.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/reformer.D0.data new file mode 100644 index 000000000..276fe735e --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/reformer.D0.data @@ -0,0 +1,47 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Reformer", "--batch-size": 64}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "transformer"], "weight": 1.0, "name": "reformer", "tag": ["reformer", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623454.950918, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-reformer.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Reformer", "--batch-size", "64"], "time": 1712623457.3343341}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "line", "data": "We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23536.375, 24512.0], "load": 0.47, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacty of 23.73 GiB of which 975.62 MiB is free. Including non-PyTorch memory, this process has 20.65 GiB memory in use. Of the allocated memory 18.80 GiB is allocated by PyTorch, and 1.56 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"} +{"event": "line", "data": " runner.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"} +{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 59, in step\n", "pipe": "stderr"} +{"event": "line", "data": " self.amp_scaler.scale(loss).backward()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/function.py\", line 288, in apply\n", "pipe": "stderr"} +{"event": "line", "data": " return user_fn(self, *args)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/reformer/modeling_reformer.py\", line 1677, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " output = layer.backward_pass(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/reformer/modeling_reformer.py\", line 1564, in backward_pass\n", "pipe": "stderr"} +{"event": "line", "data": " output.backward(grad_attn_output, retain_graph=True)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacty of 23.73 GiB of which 975.62 MiB is free. Including non-PyTorch memory, this process has 20.65 GiB memory in use. Of the allocated memory 18.80 GiB is allocated by PyTorch, and 1.56 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-reformer.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Reformer", "--batch-size", "64"], "time": 1712623461.6970937, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/reformer.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/reformer.D1.data new file mode 100644 index 000000000..2db78ffe5 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/reformer.D1.data @@ -0,0 +1,47 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Reformer", "--batch-size": 64}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "transformer"], "weight": 1.0, "name": "reformer", "tag": ["reformer", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623457.317218, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-reformer.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Reformer", "--batch-size", "64"], "time": 1712623457.3427749}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "line", "data": "We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23536.375, 24512.0], "load": 0.43, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacty of 23.73 GiB of which 975.62 MiB is free. Including non-PyTorch memory, this process has 20.65 GiB memory in use. Of the allocated memory 18.80 GiB is allocated by PyTorch, and 1.56 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"} +{"event": "line", "data": " runner.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"} +{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 59, in step\n", "pipe": "stderr"} +{"event": "line", "data": " self.amp_scaler.scale(loss).backward()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/function.py\", line 288, in apply\n", "pipe": "stderr"} +{"event": "line", "data": " return user_fn(self, *args)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/reformer/modeling_reformer.py\", line 1677, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " output = layer.backward_pass(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/reformer/modeling_reformer.py\", line 1564, in backward_pass\n", "pipe": "stderr"} +{"event": "line", "data": " output.backward(grad_attn_output, retain_graph=True)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/_tensor.py\", line 492, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " torch.autograd.backward(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py\", line 251, in backward\n", "pipe": "stderr"} +{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.50 GiB. GPU 0 has a total capacty of 23.73 GiB of which 975.62 MiB is free. Including non-PyTorch memory, this process has 20.65 GiB memory in use. Of the allocated memory 18.80 GiB is allocated by PyTorch, and 1.56 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-reformer.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Reformer", "--batch-size", "64"], "time": 1712623461.637022, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/regnet_y_128gf.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/regnet_y_128gf.D0.data new file mode 100644 index 000000000..0b8f0aae6 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/regnet_y_128gf.D0.data @@ -0,0 +1,79 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "regnet_y_128gf", "--batch-size": 64}, "tags": ["classification", "convnet", "lstm", "resnet", "vision"], "weight": 2.0, "name": "regnet_y_128gf", "tag": ["regnet_y_128gf", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623396.834406, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-regnet_y_128gf.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "regnet_y_128gf", "--batch-size", "64"], "time": 1712623399.244224}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2393.125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24360.375, 24512.0], "load": 0.06, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24430.375, 24512.0], "load": 0.12, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 70.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 81.62 MiB is free. Including non-PyTorch memory, this process has 21.52 GiB memory in use. Of the allocated memory 20.37 GiB is allocated by PyTorch, and 892.38 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/regnet.py\", line 378, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.trunk_output(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/regnet.py\", line 147, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = x + self.f(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 70.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 81.62 MiB is free. Including non-PyTorch memory, this process has 21.52 GiB memory in use. Of the allocated memory 20.37 GiB is allocated by PyTorch, and 892.38 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-regnet_y_128gf.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "regnet_y_128gf", "--batch-size", "64"], "time": 1712623407.6523812, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/regnet_y_128gf.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/regnet_y_128gf.D1.data new file mode 100644 index 000000000..6ad3f4eba --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/regnet_y_128gf.D1.data @@ -0,0 +1,79 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "regnet_y_128gf", "--batch-size": 64}, "tags": ["classification", "convnet", "lstm", "resnet", "vision"], "weight": 2.0, "name": "regnet_y_128gf", "tag": ["regnet_y_128gf", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623399.225575, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-regnet_y_128gf.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "regnet_y_128gf", "--batch-size", "64"], "time": 1712623399.2513795}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2393.125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [10386.375, 24512.0], "load": 0.05, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24430.375, 24512.0], "load": 0.11, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 70.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 81.62 MiB is free. Including non-PyTorch memory, this process has 21.52 GiB memory in use. Of the allocated memory 20.37 GiB is allocated by PyTorch, and 892.38 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 224, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 218, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py\", line 51, in train_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(inp)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/regnet.py\", line 378, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.trunk_output(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/regnet.py\", line 147, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = x + self.f(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 70.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 81.62 MiB is free. Including non-PyTorch memory, this process has 21.52 GiB memory in use. Of the allocated memory 20.37 GiB is allocated by PyTorch, and 892.38 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-regnet_y_128gf.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "regnet_y_128gf", "--batch-size", "64"], "time": 1712623407.8203948, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet152-multi.0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet152-multi.0.data new file mode 100644 index 000000000..cb4865bd4 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet152-multi.0.data @@ -0,0 +1,188 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "njobs", "n": 1}, "argv": {"--amp": true, "--model": "resnet152", "--batch-size": 256}, "tags": ["classification", "convnet", "multigpu", "resnet", "vision"], "weight": 5.0, "name": "resnet152-multi", "tag": ["resnet152-multi", "0"], "job-number": 0, "devices": ["0", "1"]}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623494.63799, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["torchrun", "--nproc_per_node=2", "--", "-m", "voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/resnet152-multi.0", "--checkpoint-hist", "1"], "time": 1712623494.654947}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training in distributed mode with multiple processes, 1 device per process.Process 1, total 2, device cuda:1.\n", "pipe": "stderr"} +{"event": "line", "data": "Training in distributed mode with multiple processes, 1 device per process.Process 0, total 2, device cuda:0.\n", "pipe": "stderr"} +{"event": "line", "data": "Model resnet152 created, param count:60192808\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.2) calculated from base learning rate (0.1) and global batch size (512) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch DistributedDataParallel.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 98.00 MiB. GPU 1 has a total capacty of 23.73 GiB of which 133.62 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 20.87 GiB is allocated by PyTorch, and 221.64 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 196, in _run_module_as_main\n", "pipe": "stderr"} +{"event": "line", "data": " return _run_code(code, main_globals, None,\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 86, in _run_code\n", "pipe": "stderr"} +{"event": "line", "data": " exec(code, run_globals)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/__main__.py\", line 4, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1519, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " else self._run_ddp_forward(*inputs, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1355, in _run_ddp_forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self.module(*inputs, **kwargs) # type: ignore[index]\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 835, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 824, in forward_features\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.layer3(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 485, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.conv3(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 98.00 MiB. GPU 1 has a total capacty of 23.73 GiB of which 133.62 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 20.87 GiB is allocated by PyTorch, and 221.64 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [14606.375, 24512.0], "load": 0.26, "temperature": null, "power": null}, "1": {"memory": [14516.375, 24512.0], "load": 0.25, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24470.375, 24512.0], "load": 0.44, "temperature": null, "power": null}, "1": {"memory": [24378.375, 24512.0], "load": 0.4, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 26.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 41.62 MiB is free. Including non-PyTorch memory, this process has 21.56 GiB memory in use. Of the allocated memory 20.84 GiB is allocated by PyTorch, and 338.14 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 196, in _run_module_as_main\n", "pipe": "stderr"} +{"event": "line", "data": " return _run_code(code, main_globals, None,\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/runpy.py\", line 86, in _run_code\n", "pipe": "stderr"} +{"event": "line", "data": " exec(code, run_globals)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/__main__.py\", line 4, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1519, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " else self._run_ddp_forward(*inputs, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/parallel/distributed.py\", line 1355, in _run_ddp_forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self.module(*inputs, **kwargs) # type: ignore[index]\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 835, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 824, in forward_features\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.layer3(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 480, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.bn2(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/batchnorm.py\", line 171, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.batch_norm(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/functional.py\", line 2478, in batch_norm\n", "pipe": "stderr"} +{"event": "line", "data": " return torch.batch_norm(\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 26.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 41.62 MiB is free. Including non-PyTorch memory, this process has 21.56 GiB memory in use. Of the allocated memory 20.84 GiB is allocated by PyTorch, and 338.14 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "line", "data": "[2024-04-09 00:45:05,663] torch.distributed.elastic.multiprocessing.api: [ERROR] failed (exitcode: 1) local_rank: 0 (pid: 50233) of binary: /mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/python\n", "pipe": "stderr"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/torchrun\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py\", line 346, in wrapper\n", "pipe": "stderr"} +{"event": "line", "data": " return f(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/run.py\", line 806, in main\n", "pipe": "stderr"} +{"event": "line", "data": " run(args)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/run.py\", line 797, in run\n", "pipe": "stderr"} +{"event": "line", "data": " elastic_launch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/launcher/api.py\", line 134, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " return launch_agent(self._config, self._entrypoint, list(args))\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/launcher/api.py\", line 264, in launch_agent\n", "pipe": "stderr"} +{"event": "line", "data": " raise ChildFailedError(\n", "pipe": "stderr"} +{"event": "line", "data": "torch.distributed.elastic.multiprocessing.errors.ChildFailedError: \n", "pipe": "stderr"} +{"event": "line", "data": "============================================================\n", "pipe": "stderr"} +{"event": "line", "data": "voir FAILED\n", "pipe": "stderr"} +{"event": "line", "data": "------------------------------------------------------------\n", "pipe": "stderr"} +{"event": "line", "data": "Failures:\n", "pipe": "stderr"} +{"event": "line", "data": "[1]:\n", "pipe": "stderr"} +{"event": "line", "data": " time : 2024-04-09_00:45:05\n", "pipe": "stderr"} +{"event": "line", "data": " host : delicatemastodon.internal.cloudapp.net\n", "pipe": "stderr"} +{"event": "line", "data": " rank : 1 (local_rank: 1)\n", "pipe": "stderr"} +{"event": "line", "data": " exitcode : 1 (pid: 50234)\n", "pipe": "stderr"} +{"event": "line", "data": " error_file: \n", "pipe": "stderr"} +{"event": "line", "data": " traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html\n", "pipe": "stderr"} +{"event": "line", "data": "------------------------------------------------------------\n", "pipe": "stderr"} +{"event": "line", "data": "Root Cause (first observed failure):\n", "pipe": "stderr"} +{"event": "line", "data": "[0]:\n", "pipe": "stderr"} +{"event": "line", "data": " time : 2024-04-09_00:45:05\n", "pipe": "stderr"} +{"event": "line", "data": " host : delicatemastodon.internal.cloudapp.net\n", "pipe": "stderr"} +{"event": "line", "data": " rank : 0 (local_rank: 0)\n", "pipe": "stderr"} +{"event": "line", "data": " exitcode : 1 (pid: 50233)\n", "pipe": "stderr"} +{"event": "line", "data": " error_file: \n", "pipe": "stderr"} +{"event": "line", "data": " traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html\n", "pipe": "stderr"} +{"event": "line", "data": "============================================================\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["torchrun", "--nproc_per_node=2", "--", "-m", "voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/resnet152-multi.0", "--checkpoint-hist", "1"], "time": 1712623505.916635, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet152.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet152.D0.data new file mode 100644 index 000000000..0c6cc0aba --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet152.D0.data @@ -0,0 +1,72 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "resnet152", "--batch-size": 256}, "tags": ["classification", "convnet", "resnet", "vision"], "weight": 1.0, "name": "resnet152", "tag": ["resnet152", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623482.010782, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/resnet152.D0", "--checkpoint-hist", "1"], "time": 1712623484.358363}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model resnet152 created, param count:60192808\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.1) calculated from base learning rate (0.1) and global batch size (256) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24458.375, 24512.0], "load": 0.3, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24458.375, 24512.0], "load": 0.3, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 98.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 53.62 MiB is free. Including non-PyTorch memory, this process has 21.55 GiB memory in use. Of the allocated memory 21.03 GiB is allocated by PyTorch, and 225.13 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 835, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 824, in forward_features\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.layer3(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 486, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.bn3(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/batchnorm.py\", line 171, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.batch_norm(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/functional.py\", line 2478, in batch_norm\n", "pipe": "stderr"} +{"event": "line", "data": " return torch.batch_norm(\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 98.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 53.62 MiB is free. Including non-PyTorch memory, this process has 21.55 GiB memory in use. Of the allocated memory 21.03 GiB is allocated by PyTorch, and 225.13 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/resnet152.D0", "--checkpoint-hist", "1"], "time": 1712623491.4809077, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet152.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet152.D1.data new file mode 100644 index 000000000..9b8f5d5fc --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet152.D1.data @@ -0,0 +1,72 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "resnet152", "--batch-size": 256}, "tags": ["classification", "convnet", "resnet", "vision"], "weight": 1.0, "name": "resnet152", "tag": ["resnet152", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623484.341721, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/resnet152.D1", "--checkpoint-hist", "1"], "time": 1712623484.36675}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model resnet152 created, param count:60192808\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.1) calculated from base learning rate (0.1) and global batch size (256) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [14430.375, 24512.0], "load": 0.27, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24378.375, 24512.0], "load": 0.33, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 98.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 133.62 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 20.92 GiB is allocated by PyTorch, and 256.13 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 1035, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 759, in main\n", "pipe": "stderr"} +{"event": "line", "data": " train_metrics = train_one_epoch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py\", line 874, in train_one_epoch\n", "pipe": "stderr"} +{"event": "line", "data": " output = model(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 835, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.forward_features(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 824, in forward_features\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.layer3(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/timm/models/resnet.py\", line 485, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.conv3(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 98.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 133.62 MiB is free. Including non-PyTorch memory, this process has 21.47 GiB memory in use. Of the allocated memory 20.92 GiB is allocated by PyTorch, and 256.13 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/vodofeze.2024-04-09_00:35:28.669482/resnet152.D1", "--checkpoint-hist", "1"], "time": 1712623492.2821853, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet50.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet50.D0.data new file mode 100644 index 000000000..77d75d3e9 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet50.D0.data @@ -0,0 +1,1313 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "resnet50", "--batch-size": 64}, "tags": ["classification", "convnet", "resnet", "vision"], "weight": 1.0, "name": "resnet50", "tag": ["resnet50", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623279.001314, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-resnet50.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "resnet50", "--batch-size", "64"], "time": 1712623281.3676748}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 7.01910400390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14129638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14862060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10113525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0660400390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13128662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06024169921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08062744140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1383056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08367919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.15252685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1346435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.12567138671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07647705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.16888427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1417236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10540771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.54, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03192138671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.125244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0218505859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97784423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0223388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9576416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97723388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.026123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.117919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06207275390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98907470703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05682373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1102294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.992431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0548095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03485107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1063232421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9847412109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03143310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95245361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90667724609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94012451171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.148681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05609130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05523681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06109619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06329345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07000732421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0882568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03497314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00689697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98052978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08697509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1002197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 540.8629889848268, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95965576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.09124755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03094482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93170166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95501708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9737548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0040283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10992431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 577.5857275953136, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89483642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84197998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.776611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.77, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86297607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97552490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 365.62123573657004, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88336181640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8970947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9532470703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8927001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97161865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8377685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93768310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8402099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 566.3842861116495, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97271728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97845458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7388916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.984619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00238037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91485595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.008056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 558.2142988182002, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96917724609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9864501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.09857177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97723388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.979736328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95098876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92535400390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 561.6083376233403, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9305419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01165771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94598388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99298095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0369873046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89678955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92120361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0009765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 557.8158022668257, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99786376953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.897705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92364501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93316650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04547119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.989013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01519775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9361572265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 558.1368020685458, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99517822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93121337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97625732421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.028564453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9761962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95465087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.15826416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 563.7884862807988, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96490478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00274658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0006103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9759521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [2, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.729736328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7237548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 394.57818703491273, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8052978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8165283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88201904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.858642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78741455078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9552001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99932861328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91802978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 563.1216979028789, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89263916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99627685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8643798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.817138671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00042724609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88897705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.78, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91864013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92852783203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87933349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 567.1127784839421, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87811279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84014892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9122314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96112060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.945068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96295166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03204345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9461669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 570.0805193305263, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9356689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91021728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9205322265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82989501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96795654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9688720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9881591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 569.3868496026478, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93280029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07684326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96673583984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9493408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.936767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9932861328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 567.7633167584821, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0050048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96368408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0535888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97381591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9410400390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0421142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98626708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98382568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 574.1381331892618, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83575439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04046630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95758056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0806884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95721435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89801025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00994873046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96990966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [3, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 574.4590234354378, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.81915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8685302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82470703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86651611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.79, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86968994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7615966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 427.1282456528531, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85400390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89483642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96588134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9073486328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9671630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8880615234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7904052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 557.1360076862483, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8543701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86492919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95697021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83990478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9840087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94183349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8853759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95355224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93658447265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 557.2594035935296, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01934814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89849853515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90081787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00103759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87249755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.893310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92474365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 563.2345194170954, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9840087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8936767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0164794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97174072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02459716796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89459228515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00933837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95965576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 558.4610341909139, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96136474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.951904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0091552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97320556640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.035888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7872314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9674072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9561767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 563.266503949861, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98101806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9078369140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95440673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86907958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8763427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85687255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98529052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01007080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0015869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 568.2406932224715, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98931884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07196044921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90740966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [4, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.76171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79718017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8758544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 422.64438447903314, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.757080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.74407958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8489990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.886962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85711669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87554931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 558.3128688484638, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87847900390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.990478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.844482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93585205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.77, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79632568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83502197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90557861328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79718017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 565.9064501128042, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9254150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89666748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03643798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79534912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99017333984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97515869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9012451171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.984130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95159912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 562.4214510337908, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94427490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93963623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98297119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9666748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85235595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92718505859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04656982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0394287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.895751953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 556.7893589944041, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.81536865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92144775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.017822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.886474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11016845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90948486328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01763916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96368408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 549.6305394010564, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9893798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02923583984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93634033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.954833984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00177001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86456298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01214599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 560.072847741851, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9852294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99310302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93255615234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0482177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00762939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.12774658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [5, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7882080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 570.6962286778095, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.78, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.812255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.872802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91485595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78887939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85943603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 439.1577346754046, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79376220703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8995361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96575927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91510009765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91705322265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92816162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7269287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9024658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 561.9890650209329, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8709716796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94378662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8624267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.988037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87945556640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.883056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06353759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88275146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8873291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 549.4166008377824, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94964599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02496337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96051025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89263916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96856689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9273681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83465576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 555.4719942019061, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.899169921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8436279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94610595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99530029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96063232421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83734130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.919189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 558.5194406731885, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04193115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92437744140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92291259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02398681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.950439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92779541015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9212646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 552.8064705183521, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9705810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97064208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.884033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0501708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01141357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9388427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9471435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86895751953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13372802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 559.3161012865334, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9693603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94915771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [6, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8272705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.74981689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 402.79519938401705, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8818359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85467529296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8621826171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88482666015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938232421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9154052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8677978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85736083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9222412109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 552.3973828236761, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8905029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8768310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87493896484375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.78, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94610595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93951416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86285400390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9405517578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89117431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9681396484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 550.838790677005, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.916259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8900146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9620361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8468017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08172607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8963623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97369384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96624755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 561.1008878128873, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01641845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91741943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97906494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93377685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8817138671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91033935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.803466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9432373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9241943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 559.9272747013699, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02960205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86810302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98382568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04705810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9481201171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89422607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.932861328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92510986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 547.7687287979326, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86151123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88433837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96588134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05517578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.895263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9244384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86724853515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.18231201171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.015869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 558.414965490052, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01800537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9686279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9774169921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8953857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0140380859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99749755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94049072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [7, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.904052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 569.2849220655446, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.79, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78289794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8101806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.73468017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92413330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88800048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92901611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.814208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 426.86287235653316, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79412841796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9886474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.76678466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8865966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.877197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80401611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.832763671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.725341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 549.1694505672627, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84552001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8594970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97454833984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89898681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.863525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92596435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96124267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90216064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 558.6267759781723, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.87, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8924560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.928955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99713134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08917236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83929443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8880615234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88873291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00555419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 549.1266568098622, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.73919677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0775146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86798095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9541015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96783447265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95538330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99176025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9661865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 555.8718604766264, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.020263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.972412109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9315185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01971435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96453857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96722412109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9183349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95294189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 550.2756279987647, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8988037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.09783935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9989013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01422119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9864501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86346435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93499755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 558.5491733319992, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.81903076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0323486328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [8, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7816162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88311767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9239501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.81005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 412.19744715429607, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7730712890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9024658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80145263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80615234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9471435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0303955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87921142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.78, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 552.2351977530562, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9085693359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.73345947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80206298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.916259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8074951171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9952392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99249267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 553.3490277399642, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88726806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86572265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96881103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88702392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9696044921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92523193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9544677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 555.3019226877891, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94122314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9337158203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0208740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03631591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97393798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96246337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0494384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.81549072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 554.8291932754344, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8983154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9752197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8819580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99969482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.965576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02691650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95599365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 559.1426145201709, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8226318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9971923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90887451171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9595947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9736328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.911865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85284423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 567.0612033055274, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5818.375, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-resnet50.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "resnet50", "--batch-size", "64"], "time": 1712623351.9785657, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet50.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet50.D1.data new file mode 100644 index 000000000..01d96ae53 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/resnet50.D1.data @@ -0,0 +1,1305 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "resnet50", "--batch-size": 64}, "tags": ["classification", "convnet", "resnet", "vision"], "weight": 1.0, "name": "resnet50", "tag": ["resnet50", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623281.350314, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-resnet50.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "resnet50", "--batch-size", "64"], "time": 1712623281.3746731}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 7.01910400390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14129638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14862060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10113525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0660400390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13128662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06024169921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08062744140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1383056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08367919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.15252685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1346435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.12567138671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07647705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.16888427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1417236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10540771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.53, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03192138671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.125244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0218505859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97784423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0223388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9576416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97723388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.026123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.117919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06207275390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98907470703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05682373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1102294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.992431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0548095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03485107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1063232421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9847412109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03143310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95245361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90667724609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94012451171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.148681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05609130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05523681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06109619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06329345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07000732421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0882568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03497314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00689697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98052978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08697509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1002197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 571.562147430314, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95965576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.09124755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03094482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93170166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95501708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9737548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0040283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10992431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 566.9717672333396, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89483642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84197998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.776611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86297607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.77, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 420.49141441352765, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97552490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88336181640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8970947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9532470703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8927001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97161865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8377685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93768310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 562.7444030816779, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8402099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97271728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97845458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7388916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.984619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00238037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91485595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.008056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 563.2707177123414, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96917724609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9864501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.09857177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97723388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.979736328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95098876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 556.4874482125751, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92535400390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9305419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01165771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94598388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99298095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0369873046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89678955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92120361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0009765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 542.3923850097916, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99786376953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.897705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92364501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93316650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04547119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.989013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01519775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 549.8178578005214, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9361572265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99517822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93121337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97625732421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.028564453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9761962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95465087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.87, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.15826416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 560.8759999739893, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96490478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00274658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0006103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9759521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [2, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.729736328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 551.837568301773, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7237548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8052978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8165283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88201904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.858642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78741455078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9552001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99932861328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 469.3895168593994, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91802978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89263916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99627685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8643798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.817138671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00042724609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88897705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.76, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91864013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92852783203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 564.7536671573436, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87933349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87811279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84014892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9122314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96112060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.945068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96295166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03204345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9461669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 559.0853398782426, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9356689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91021728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9205322265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82989501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96795654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9688720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 556.4904079139615, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9881591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93280029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07684326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96673583984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9493408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.87, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.936767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9932861328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 549.2815062226576, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0050048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96368408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0535888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97381591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9410400390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0421142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98626708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 560.7178202843138, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98382568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83575439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04046630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95758056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0806884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95721435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89801025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00994873046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96990966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 568.0018181501531, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [3, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.81915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8685302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.76, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82470703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86651611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86968994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 403.9121216525652, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7615966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85400390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89483642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96588134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9073486328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9671630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 566.8610809580437, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8880615234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7904052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8543701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86492919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95697021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83990478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9840087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94183349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8853759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 556.3672931825062, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95355224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93658447265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01934814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89849853515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90081787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00103759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87249755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.893310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 561.8522814365483, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92474365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9840087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8936767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0164794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97174072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02459716796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89459228515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 553.9684525026213, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00933837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95965576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96136474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.951904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0091552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97320556640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.035888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7872314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 564.0927522435003, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9674072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9561767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98101806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9078369140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95440673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86907958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8763427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85687255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98529052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01007080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 576.4473832928974, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0015869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98931884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07196044921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90740966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [4, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.76171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 564.4311620857126, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79718017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8758544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.757080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.74407958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8489990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.886962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 468.69604645968604, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85711669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87554931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87847900390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.990478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.844482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.76, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93585205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79632568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 565.6751973056183, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83502197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90557861328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79718017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9254150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89666748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03643798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79534912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99017333984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97515869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 557.517644480054, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9012451171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.984130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95159912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94427490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93963623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98297119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9666748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85235595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92718505859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 555.5728743396013, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04656982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0394287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.895751953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.81536865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92144775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.017822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.886474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11016845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90948486328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 552.4945852796945, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01763916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96368408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9893798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02923583984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93634033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.954833984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 557.3192349550757, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00177001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86456298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01214599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9852294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99310302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93255615234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0482177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00762939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 572.4967515431069, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.12774658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [5, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7882080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.8, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.812255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.872802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91485595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 423.66935800897835, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78887939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85943603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79376220703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8995361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96575927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91510009765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91705322265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92816162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 553.4692168291241, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7269287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9024658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8709716796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94378662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8624267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.988037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87945556640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.883056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06353759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 560.1308303804195, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88275146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8873291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94964599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02496337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.85, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96051025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89263916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96856689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 556.4007297666966, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9273681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83465576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.899169921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8436279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94610595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99530029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96063232421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 556.1966644816506, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83734130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.919189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04193115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92437744140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92291259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02398681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.950439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 551.1792281026767, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92779541015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9212646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9705810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97064208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.884033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0501708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01141357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9388427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9471435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 560.0599026848275, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86895751953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13372802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9693603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94915771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [6, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8272705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 568.7964354522785, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.74981689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8818359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85467529296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8621826171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88482666015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938232421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9154052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 442.1576664045869, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8677978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85736083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9222412109375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.76, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8905029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8768310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87493896484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94610595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93951416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86285400390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 569.1787838703175, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9405517578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89117431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9681396484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.916259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8900146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9620361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8468017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08172607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 542.1509950854435, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8963623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97369384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96624755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01641845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91741943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97906494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93377685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8817138671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91033935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 542.4429091010177, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.803466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9432373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9241943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.87, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02960205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86810302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98382568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04705810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9481201171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 551.5429587105873, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89422607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.932861328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92510986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86151123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88433837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96588134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05517578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.895263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9244384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 545.3638265576355, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86724853515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.18231201171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.015869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01800537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9686279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9774169921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8953857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0140380859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99749755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 557.8913597234284, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94049072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [7, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.904052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78289794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8101806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.73468017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92413330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 433.9838793500946, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88800048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92901611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.814208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79412841796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9886474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.76678466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8865966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.877197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 567.142052926248, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80401611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.832763671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.725341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84552001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8594970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97454833984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89898681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 565.7526995648817, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.863525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92596435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.76, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96124267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90216064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8924560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.928955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99713134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08917236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 559.6501151427645, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83929443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8880615234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88873291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00555419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.73919677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0775146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86798095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9541015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96783447265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 560.648659831527, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95538330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99176025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9661865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.020263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.972412109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9315185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01971435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 556.6488314705962, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96453857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96722412109375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.88, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9183349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95294189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8988037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.09783935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9989013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01422119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 562.3699744601726, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9864501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86346435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93499755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.81903076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0323486328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [8, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7816162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 566.4862111774687, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88311767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9239501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.81005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7730712890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9024658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80145263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80615234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 457.02134414465405, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9471435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.77, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0303955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87921142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9085693359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.73345947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80206298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.916259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 562.1546465512536, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8074951171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9952392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99249267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88726806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86572265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96881103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 561.3746981032132, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88702392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9696044921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92523193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9544677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94122314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9337158203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0208740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03631591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 561.9854419583572, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97393798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96246337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0494384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.81549072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8983154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9752197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8819580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99969482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 561.4594785112506, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.965576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02691650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95599365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8226318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9971923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90887451171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 567.6137184620268, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [5818.375, 24512.0], "load": 0.89, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-resnet50.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "resnet50", "--batch-size", "64"], "time": 1712623351.85862, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/rwkv.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/rwkv.D0.data new file mode 100644 index 000000000..317f8c4cb --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/rwkv.D0.data @@ -0,0 +1,25 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "rwkv", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv", "tags": ["llm", "rnn", "unsupported-rocm"], "plan": {"method": "per_gpu"}, "argv": {"--data_type": "dummy", "--ctx_len": 128, "--epoch_steps": 1000, "--epoch_count": 20, "--epoch_begin": 0, "--epoch_save": 0, "--micro_bsz": 16, "--n_layer": 12, "--n_embd": 768, "--pre_ffn": 0, "--head_qk": 0, "--lr_init": "6e-4", "--lr_final": "1e-5", "--warmup_steps": 0, "--beta1": 0.9, "--beta2": 0.99, "--adam_eps": "1e-8", "--accelerator": "gpu", "--devices": 1, "--precision": "tf32", "--strategy": "ddp_find_unused_parameters_false", "--grad_cp": 0, "--random_seed": 1234, "--enable_progress_bar": "False"}, "weight": 1.0, "name": "rwkv", "tag": ["rwkv", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623902.898479, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv/voirconf-rwkv.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py", "--data_type", "dummy", "--ctx_len", "128", "--epoch_steps", "1000", "--epoch_count", "20", "--epoch_begin", "0", "--epoch_save", "0", "--micro_bsz", "16", "--n_layer", "12", "--n_embd", "768", "--pre_ffn", "0", "--head_qk", "0", "--lr_init", "6e-4", "--lr_final", "1e-5", "--warmup_steps", "0", "--beta1", "0.9", "--beta2", "0.99", "--adam_eps", "1e-8", "--accelerator", "gpu", "--devices", "1", "--precision", "tf32", "--strategy", "ddp_find_unused_parameters_false", "--grad_cp", "0", "--random_seed", "1234", "--enable_progress_bar", "False"], "time": 1712623905.3506181}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2393.125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "ModuleNotFoundError", "message": "No module named 'deepspeed'"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py\", line 6, in \n", "pipe": "stderr"} +{"event": "line", "data": " import deepspeed\n", "pipe": "stderr"} +{"event": "line", "data": "ModuleNotFoundError: No module named 'deepspeed'\n", "pipe": "stderr"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv/voirconf-rwkv.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py", "--data_type", "dummy", "--ctx_len", "128", "--epoch_steps", "1000", "--epoch_count", "20", "--epoch_begin", "0", "--epoch_save", "0", "--micro_bsz", "16", "--n_layer", "12", "--n_embd", "768", "--pre_ffn", "0", "--head_qk", "0", "--lr_init", "6e-4", "--lr_final", "1e-5", "--warmup_steps", "0", "--beta1", "0.9", "--beta2", "0.99", "--adam_eps", "1e-8", "--accelerator", "gpu", "--devices", "1", "--precision", "tf32", "--strategy", "ddp_find_unused_parameters_false", "--grad_cp", "0", "--random_seed", "1234", "--enable_progress_bar", "False"], "time": 1712623906.9577646, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/rwkv.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/rwkv.D1.data new file mode 100644 index 000000000..774db4898 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/rwkv.D1.data @@ -0,0 +1,25 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "rwkv", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv", "tags": ["llm", "rnn", "unsupported-rocm"], "plan": {"method": "per_gpu"}, "argv": {"--data_type": "dummy", "--ctx_len": 128, "--epoch_steps": 1000, "--epoch_count": 20, "--epoch_begin": 0, "--epoch_save": 0, "--micro_bsz": 16, "--n_layer": 12, "--n_embd": 768, "--pre_ffn": 0, "--head_qk": 0, "--lr_init": "6e-4", "--lr_final": "1e-5", "--warmup_steps": 0, "--beta1": 0.9, "--beta2": 0.99, "--adam_eps": "1e-8", "--accelerator": "gpu", "--devices": 1, "--precision": "tf32", "--strategy": "ddp_find_unused_parameters_false", "--grad_cp": 0, "--random_seed": 1234, "--enable_progress_bar": "False"}, "weight": 1.0, "name": "rwkv", "tag": ["rwkv", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623905.332813, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv/voirconf-rwkv.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py", "--data_type", "dummy", "--ctx_len", "128", "--epoch_steps", "1000", "--epoch_count", "20", "--epoch_begin", "0", "--epoch_save", "0", "--micro_bsz", "16", "--n_layer", "12", "--n_embd", "768", "--pre_ffn", "0", "--head_qk", "0", "--lr_init", "6e-4", "--lr_final", "1e-5", "--warmup_steps", "0", "--beta1", "0.9", "--beta2", "0.99", "--adam_eps", "1e-8", "--accelerator", "gpu", "--devices", "1", "--precision", "tf32", "--strategy", "ddp_find_unused_parameters_false", "--grad_cp", "0", "--random_seed", "1234", "--enable_progress_bar", "False"], "time": 1712623905.3838406}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2393.125, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "ModuleNotFoundError", "message": "No module named 'deepspeed'"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py\", line 6, in \n", "pipe": "stderr"} +{"event": "line", "data": " import deepspeed\n", "pipe": "stderr"} +{"event": "line", "data": "ModuleNotFoundError: No module named 'deepspeed'\n", "pipe": "stderr"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv/voirconf-rwkv.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py", "--data_type", "dummy", "--ctx_len", "128", "--epoch_steps", "1000", "--epoch_count", "20", "--epoch_begin", "0", "--epoch_save", "0", "--micro_bsz", "16", "--n_layer", "12", "--n_embd", "768", "--pre_ffn", "0", "--head_qk", "0", "--lr_init", "6e-4", "--lr_final", "1e-5", "--warmup_steps", "0", "--beta1", "0.9", "--beta2", "0.99", "--adam_eps", "1e-8", "--accelerator", "gpu", "--devices", "1", "--precision", "tf32", "--strategy", "ddp_find_unused_parameters_false", "--grad_cp", "0", "--random_seed", "1234", "--enable_progress_bar", "False"], "time": 1712623906.8942654, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/stargan.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/stargan.D0.data new file mode 100644 index 000000000..ec635c96f --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/stargan.D0.data @@ -0,0 +1,171 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "stargan", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "tags": ["gan", "resnet", "vision"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan", "plan": {"method": "per_gpu"}, "argv": {"--image_size": 512, "--c_dim": 5, "--batch_size": 16}, "weight": 1.0, "name": "stargan", "tag": ["stargan", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 24460.375, "total": 24512.0}, "utilization": {"compute": 0.97, "memory": 0.9978938887075718}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623657.926485, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/voirconf-stargan.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py", "--image_size", "512", "--c_dim", "5", "--batch_size", "16"], "time": 1712623660.3444593}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Namespace(c_dim=5, c2_dim=8, celeba_crop_size=178, rafd_crop_size=256, image_size=512, g_conv_dim=64, d_conv_dim=64, g_repeat_num=6, d_repeat_num=6, lambda_cls=1, lambda_rec=10, lambda_gp=10, dataset='synth', batch_size=16, num_iters=200000, num_iters_decay=100000, g_lr=0.0001, d_lr=0.0001, n_critic=5, beta1=0.5, beta2=0.999, resume_iters=None, selected_attrs=['Black_Hair', 'Blond_Hair', 'Brown_Hair', 'Male', 'Young'], test_iters=200000, num_workers=1, mode='train', use_tensorboard=False, celeba_image_dir='data/celeba/images', attr_path='data/celeba/list_attr_celeba.txt', rafd_image_dir='data/RaFD/train', log_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/logs', model_save_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/models', sample_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/samples', result_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/results', log_step=10, sample_step=1000, model_save_step=10000, lr_update_step=1000)\n", "pipe": "stdout"} +{"event": "line", "data": "Generator(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(8, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (5): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (6): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (7): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (8): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (9): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (10): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (11): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (12): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (13): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (14): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (15): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (16): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (17): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (18): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (19): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (20): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (21): Conv2d(64, 3, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (22): Tanh()\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": ")\n", "pipe": "stdout"} +{"event": "line", "data": "G\n", "pipe": "stdout"} +{"event": "line", "data": "The number of parameters: 8430528\n", "pipe": "stdout"} +{"event": "line", "data": "Discriminator(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (1): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (3): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (5): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (6): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (7): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (8): Conv2d(512, 1024, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (9): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (10): Conv2d(1024, 2048, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (11): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (conv1): Conv2d(2048, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (conv2): Conv2d(2048, 5, kernel_size=(8, 8), stride=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": ")\n", "pipe": "stdout"} +{"event": "line", "data": "D\n", "pipe": "stdout"} +{"event": "line", "data": "The number of parameters: 45376448\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [0, 10000]}, "pipe": "data"} +{"event": "line", "data": "Start training...\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [1, 10000]}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(warning.format(ret))\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 13.107915878295898}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [16198.375, 24512.0], "load": 0.67, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [20402.375, 24512.0], "load": 0.97, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [20142.375, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24384.375, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 256.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 127.62 MiB is free. Including non-PyTorch memory, this process has 21.48 GiB memory in use. Of the allocated memory 20.76 GiB is allocated by PyTorch, and 433.74 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py\", line 222, in \n", "pipe": "stderr"} +{"event": "line", "data": " main(config)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py\", line 77, in main\n", "pipe": "stderr"} +{"event": "line", "data": " solver.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/solver.py\", line 282, in train\n", "pipe": "stderr"} +{"event": "line", "data": " x_fake = self.G(x_real, c_trg)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/model.py\", line 94, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self.main(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/model.py\", line 20, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return x + self.main(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 256.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 127.62 MiB is free. Including non-PyTorch memory, this process has 21.48 GiB memory in use. Of the allocated memory 20.76 GiB is allocated by PyTorch, and 433.74 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/voirconf-stargan.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py", "--image_size", "512", "--c_dim", "5", "--batch_size", "16"], "time": 1712623672.0065103, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/stargan.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/stargan.D1.data new file mode 100644 index 000000000..002c71be0 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/stargan.D1.data @@ -0,0 +1,171 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "stargan", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "tags": ["gan", "resnet", "vision"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan", "plan": {"method": "per_gpu"}, "argv": {"--image_size": 512, "--c_dim": 5, "--batch_size": 16}, "weight": 1.0, "name": "stargan", "tag": ["stargan", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623660.326328, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/voirconf-stargan.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py", "--image_size", "512", "--c_dim", "5", "--batch_size", "16"], "time": 1712623660.3523586}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Namespace(c_dim=5, c2_dim=8, celeba_crop_size=178, rafd_crop_size=256, image_size=512, g_conv_dim=64, d_conv_dim=64, g_repeat_num=6, d_repeat_num=6, lambda_cls=1, lambda_rec=10, lambda_gp=10, dataset='synth', batch_size=16, num_iters=200000, num_iters_decay=100000, g_lr=0.0001, d_lr=0.0001, n_critic=5, beta1=0.5, beta2=0.999, resume_iters=None, selected_attrs=['Black_Hair', 'Blond_Hair', 'Brown_Hair', 'Male', 'Young'], test_iters=200000, num_workers=1, mode='train', use_tensorboard=False, celeba_image_dir='data/celeba/images', attr_path='data/celeba/list_attr_celeba.txt', rafd_image_dir='data/RaFD/train', log_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/logs', model_save_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/models', sample_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/samples', result_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/results', log_step=10, sample_step=1000, model_save_step=10000, lr_update_step=1000)\n", "pipe": "stdout"} +{"event": "line", "data": "Generator(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(8, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (5): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (6): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (7): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (8): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (9): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (10): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (11): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (12): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (13): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (14): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (15): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (16): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (17): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (18): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (19): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (20): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (21): Conv2d(64, 3, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (22): Tanh()\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": ")\n", "pipe": "stdout"} +{"event": "line", "data": "G\n", "pipe": "stdout"} +{"event": "line", "data": "The number of parameters: 8430528\n", "pipe": "stdout"} +{"event": "line", "data": "Discriminator(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (1): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (3): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (5): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (6): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (7): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (8): Conv2d(512, 1024, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (9): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (10): Conv2d(1024, 2048, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (11): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (conv1): Conv2d(2048, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (conv2): Conv2d(2048, 5, kernel_size=(8, 8), stride=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": ")\n", "pipe": "stdout"} +{"event": "line", "data": "D\n", "pipe": "stdout"} +{"event": "line", "data": "The number of parameters: 45376448\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [0, 10000]}, "pipe": "data"} +{"event": "line", "data": "Start training...\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [1, 10000]}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(warning.format(ret))\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 13.121368408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [18876.375, 24512.0], "load": 0.69, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [20402.375, 24512.0], "load": 0.93, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [19756.375, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24384.375, 24512.0], "load": 0.98, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 256.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 127.62 MiB is free. Including non-PyTorch memory, this process has 21.48 GiB memory in use. Of the allocated memory 20.76 GiB is allocated by PyTorch, and 433.74 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py\", line 222, in \n", "pipe": "stderr"} +{"event": "line", "data": " main(config)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py\", line 77, in main\n", "pipe": "stderr"} +{"event": "line", "data": " solver.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/solver.py\", line 282, in train\n", "pipe": "stderr"} +{"event": "line", "data": " x_fake = self.G(x_real, c_trg)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/model.py\", line 94, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self.main(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/model.py\", line 20, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return x + self.main(x)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/container.py\", line 215, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " input = module(input)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 256.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 127.62 MiB is free. Including non-PyTorch memory, this process has 21.48 GiB memory in use. Of the allocated memory 20.76 GiB is allocated by PyTorch, and 433.74 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/voirconf-stargan.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py", "--image_size", "512", "--c_dim", "5", "--batch_size", "16"], "time": 1712623672.1867037, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/super-slomo.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/super-slomo.D0.data new file mode 100644 index 000000000..7bfc19e25 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/super-slomo.D0.data @@ -0,0 +1,59 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "super-slomo", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "tags": ["convnet", "unet", "video-interpolation", "vision"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo", "plan": {"method": "per_gpu"}, "argv": {"--train_batch_size": 32}, "weight": 1.0, "name": "super-slomo", "tag": ["super-slomo", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623674.601034, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo/voirconf-super-slomo.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py", "--train_batch_size", "32"], "time": 1712623677.0263307}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(msg)\n", "pipe": "stderr"} +{"event": "line", "data": "Epoch: 0\n", "pipe": "stdout"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:136: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\"Detected call of `lr_scheduler.step()` before `optimizer.step()`. \"\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "progress": [0, 10000]}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/functional.py:4296: UserWarning: Default grid_sample and affine_grid behavior has changed to align_corners=False since 1.3.0. Please specify align_corners=True if the old behavior is desired. See the documentation of grid_sample for details.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24438.375, 24512.0], "load": 0.09, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 484.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 73.62 MiB is free. Including non-PyTorch memory, this process has 21.53 GiB memory in use. Of the allocated memory 20.51 GiB is allocated by PyTorch, and 765.90 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py\", line 274, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py\", line 219, in main\n", "pipe": "stderr"} +{"event": "line", "data": " intrpOut = ArbTimeFlowIntrp(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/model.py\", line 209, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.up5(x, s1)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/model.py\", line 139, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = F.leaky_relu(self.conv2(torch.cat((x, skpCn), 1)), negative_slope=0.1)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 484.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 73.62 MiB is free. Including non-PyTorch memory, this process has 21.53 GiB memory in use. Of the allocated memory 20.51 GiB is allocated by PyTorch, and 765.90 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo/voirconf-super-slomo.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py", "--train_batch_size", "32"], "time": 1712623681.6581614, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/super-slomo.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/super-slomo.D1.data new file mode 100644 index 000000000..dec5c3bdb --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/super-slomo.D1.data @@ -0,0 +1,59 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "super-slomo", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "tags": ["convnet", "unet", "video-interpolation", "vision"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo", "plan": {"method": "per_gpu"}, "argv": {"--train_batch_size": 32}, "weight": 1.0, "name": "super-slomo", "tag": ["super-slomo", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623677.008645, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo/voirconf-super-slomo.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py", "--train_batch_size", "32"], "time": 1712623677.0334249}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(msg)\n", "pipe": "stderr"} +{"event": "line", "data": "Epoch: 0\n", "pipe": "stdout"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:136: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\"Detected call of `lr_scheduler.step()` before `optimizer.step()`. \"\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "progress": [0, 10000]}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:456: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/functional.py:4296: UserWarning: Default grid_sample and affine_grid behavior has changed to align_corners=False since 1.3.0. Please specify align_corners=True if the old behavior is desired. See the documentation of grid_sample for details.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24438.375, 24512.0], "load": 0.06, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 484.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 73.62 MiB is free. Including non-PyTorch memory, this process has 21.53 GiB memory in use. Of the allocated memory 20.51 GiB is allocated by PyTorch, and 765.90 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py\", line 274, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py\", line 219, in main\n", "pipe": "stderr"} +{"event": "line", "data": " intrpOut = ArbTimeFlowIntrp(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/model.py\", line 209, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = self.up5(x, s1)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/model.py\", line 139, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " x = F.leaky_relu(self.conv2(torch.cat((x, skpCn), 1)), negative_slope=0.1)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 460, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " return self._conv_forward(input, self.weight, self.bias)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py\", line 456, in _conv_forward\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv2d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 484.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 73.62 MiB is free. Including non-PyTorch memory, this process has 21.53 GiB memory in use. Of the allocated memory 20.51 GiB is allocated by PyTorch, and 765.90 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo/voirconf-super-slomo.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py", "--train_batch_size", "32"], "time": 1712623681.5408883, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/t5.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/t5.D0.data new file mode 100644 index 000000000..654b729d4 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/t5.D0.data @@ -0,0 +1,64 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "T5", "--batch-size": 16}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "transformer"], "weight": 2.0, "name": "t5", "tag": ["t5", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623446.544149, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-t5.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "T5", "--batch-size", "16"], "time": 1712623448.9517653}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24078.375, 24512.0], "load": 0.03, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 512.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 433.62 MiB is free. Including non-PyTorch memory, this process has 21.18 GiB memory in use. Of the allocated memory 20.75 GiB is allocated by PyTorch, and 157.31 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"} +{"event": "line", "data": " runner.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"} +{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"} +{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 1746, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " decoder_outputs = self.decoder(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 1113, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " layer_outputs = layer_module(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 694, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " self_attention_outputs = self.layer[0](\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 601, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " attention_output = self.SelfAttention(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 561, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " attn_weights = nn.functional.softmax(scores.float(), dim=-1).type_as(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/functional.py\", line 1856, in softmax\n", "pipe": "stderr"} +{"event": "line", "data": " ret = input.softmax(dim)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 512.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 433.62 MiB is free. Including non-PyTorch memory, this process has 21.18 GiB memory in use. Of the allocated memory 20.75 GiB is allocated by PyTorch, and 157.31 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-t5.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "T5", "--batch-size", "16"], "time": 1712623452.466748, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/t5.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/t5.D1.data new file mode 100644 index 000000000..67f63f1bc --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/t5.D1.data @@ -0,0 +1,64 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "T5", "--batch-size": 16}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "transformer"], "weight": 2.0, "name": "t5", "tag": ["t5", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623448.934645, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-t5.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "T5", "--batch-size", "16"], "time": 1712623448.9603055}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24078.375, 24512.0], "load": 0.05, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 512.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 433.62 MiB is free. Including non-PyTorch memory, this process has 21.18 GiB memory in use. Of the allocated memory 20.75 GiB is allocated by PyTorch, and 157.31 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"} +{"event": "line", "data": " runner.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"} +{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"} +{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 1746, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " decoder_outputs = self.decoder(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 1113, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " layer_outputs = layer_module(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 694, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " self_attention_outputs = self.layer[0](\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 601, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " attention_output = self.SelfAttention(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py\", line 561, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " attn_weights = nn.functional.softmax(scores.float(), dim=-1).type_as(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/functional.py\", line 1856, in softmax\n", "pipe": "stderr"} +{"event": "line", "data": " ret = input.softmax(dim)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 512.00 MiB. GPU 0 has a total capacty of 23.73 GiB of which 433.62 MiB is free. Including non-PyTorch memory, this process has 21.18 GiB memory in use. Of the allocated memory 20.75 GiB is allocated by PyTorch, and 157.31 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-t5.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "T5", "--batch-size", "16"], "time": 1712623452.54347, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/tf32.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/tf32.D0.data new file mode 100644 index 000000000..157549472 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/tf32.D0.data @@ -0,0 +1,183 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp32", "--tf32": true}, "weight": 0.0, "name": "tf32", "tag": ["tf32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623091.8331, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32", "--tf32"], "time": 1712623094.2285924}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 43.8435710265474, "units": "Tflops", "t": 1712623096.0294204}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2393.125, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712623095.4086173}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.06, "temperature": null, "power": null}}, "t": 1712623095.9150257}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.471589267725115, "units": "Tflops", "t": 1712623096.524578}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.25, "temperature": null, "power": null}}, "t": 1712623096.4205604}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.359358890797566, "units": "Tflops", "t": 1712623097.0095093}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.45, "temperature": null, "power": null}}, "t": 1712623096.9260511}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.01994795353335, "units": "Tflops", "t": 1712623097.5091982}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.58, "temperature": null, "power": null}}, "t": 1712623097.431647}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.15483639488094, "units": "Tflops", "t": 1712623097.9967682}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.78, "temperature": null, "power": null}}, "t": 1712623097.9371946}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.371336189899424, "units": "Tflops", "t": 1712623098.492664}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.91, "temperature": null, "power": null}}, "t": 1712623098.442748}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.53841862809422, "units": "Tflops", "t": 1712623098.9865313}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623098.9483185}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.534891759203454, "units": "Tflops", "t": 1712623099.4804626}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623099.4538426}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.87950966310718, "units": "Tflops", "t": 1712623099.9708643}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623099.9593818}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.63869445605604, "units": "Tflops", "t": 1712623100.4641762}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.775759037463295, "units": "Tflops", "t": 1712623100.95546}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623100.4648595}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.70724087934804, "units": "Tflops", "t": 1712623101.4475007}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623100.9703112}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.31407498570542, "units": "Tflops", "t": 1712623101.9439554}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623101.4758046}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.24532709354406, "units": "Tflops", "t": 1712623102.4411163}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623101.9812956}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.582336173192594, "units": "Tflops", "t": 1712623102.9347699}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623102.486918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.533859610557016, "units": "Tflops", "t": 1712623103.4287744}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623102.9928212}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.73189365875823, "units": "Tflops", "t": 1712623103.920504}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623103.4982536}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.44962155420787, "units": "Tflops", "t": 1712623104.4153507}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623104.003749}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.6805156477458, "units": "Tflops", "t": 1712623104.907686}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623104.5092206}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.46580054426478, "units": "Tflops", "t": 1712623105.4026744}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623105.015092}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.17861194251971, "units": "Tflops", "t": 1712623105.9005663}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623105.5206554}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.49752838022492, "units": "Tflops", "t": 1712623106.3951027}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623106.026228}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.552854503360656, "units": "Tflops", "t": 1712623106.8889887}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623106.5317614}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.21919349541802, "units": "Tflops", "t": 1712623107.3866274}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623107.037369}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.9911588931869, "units": "Tflops", "t": 1712623107.875701}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623107.5428312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.75668223287567, "units": "Tflops", "t": 1712623108.3673098}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623108.0484407}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.81239397954099, "units": "Tflops", "t": 1712623108.8695068}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623108.553901}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.82652855968339, "units": "Tflops", "t": 1712623109.360327}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623109.0593958}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.47343339984925, "units": "Tflops", "t": 1712623109.8549066}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623109.564936}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.55758961450895, "units": "Tflops", "t": 1712623110.3485708}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623110.0703964}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.75577008241259, "units": "Tflops", "t": 1712623110.8400342}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623110.5758479}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.75409790312376, "units": "Tflops", "t": 1712623111.331513}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623111.0813398}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.357444333507466, "units": "Tflops", "t": 1712623111.8273902}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623111.5869372}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.638845684245666, "units": "Tflops", "t": 1712623112.3202424}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623112.0924084}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.20177411880263, "units": "Tflops", "t": 1712623112.817862}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623112.5978737}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.34991521230061, "units": "Tflops", "t": 1712623113.3139157}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623113.1034243}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.49606863616717, "units": "Tflops", "t": 1712623113.8082895}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623113.6089637}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.68962982148018, "units": "Tflops", "t": 1712623114.3007636}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623114.114521}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.12784184112692, "units": "Tflops", "t": 1712623114.799414}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623114.620103}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.759983659624204, "units": "Tflops", "t": 1712623115.290931}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623115.1256626}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.89751173474453, "units": "Tflops", "t": 1712623115.7920356}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623115.6310794}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.89126141086514, "units": "Tflops", "t": 1712623116.2820249}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623116.136676}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.19150269893531, "units": "Tflops", "t": 1712623116.7797778}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623116.6422555}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.39747901448011, "units": "Tflops", "t": 1712623117.2752066}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623117.1478684}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.273807969226915, "units": "Tflops", "t": 1712623117.7721224}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623117.653514}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.790436206455894, "units": "Tflops", "t": 1712623118.263283}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623118.1591587}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.54667884823142, "units": "Tflops", "t": 1712623118.7570596}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623118.664685}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.1924978647532, "units": "Tflops", "t": 1712623119.2547846}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623119.1702466}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.20099035771395, "units": "Tflops", "t": 1712623119.7524133}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623119.675777}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.14304793874101, "units": "Tflops", "t": 1712623120.2506862}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623120.1814673}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.077061913061236, "units": "Tflops", "t": 1712623120.7497108}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623120.686945}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.944570217020996, "units": "Tflops", "t": 1712623121.250237}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623121.1923978}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.92383114296778, "units": "Tflops", "t": 1712623121.7510133}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623121.6981134}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.34072589736604, "units": "Tflops", "t": 1712623122.247069}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623122.2036312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.57496748403367, "units": "Tflops", "t": 1712623122.7405236}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623122.7090547}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.232553409048414, "units": "Tflops", "t": 1712623123.23779}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623123.2145367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.66759764488671, "units": "Tflops", "t": 1712623123.7302427}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623123.7200105}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.524378983365345, "units": "Tflops", "t": 1712623124.2242577}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.866320542025534, "units": "Tflops", "t": 1712623124.7256444}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623124.2255557}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.49568224935959, "units": "Tflops", "t": 1712623125.2203665}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623124.7312415}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.47405089343643, "units": "Tflops", "t": 1712623125.7264655}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623125.236843}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.878820923091595, "units": "Tflops", "t": 1712623126.2278664}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623125.74243}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.26507503524926, "units": "Tflops", "t": 1712623126.725049}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623126.2478912}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.32378575532784, "units": "Tflops", "t": 1712623127.2215304}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623126.7533865}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.01744800182675, "units": "Tflops", "t": 1712623127.7214098}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623127.2590888}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.32730056933882, "units": "Tflops", "t": 1712623128.2176178}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623127.764734}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.0058343039614, "units": "Tflops", "t": 1712623128.7176442}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623128.2702048}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.248001589154356, "units": "Tflops", "t": 1712623129.2148857}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623128.7757044}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.68022438717206, "units": "Tflops", "t": 1712623129.7187438}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623129.2814033}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.13580261595541, "units": "Tflops", "t": 1712623130.2173321}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623129.7868505}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.625951761861835, "units": "Tflops", "t": 1712623130.710333}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623130.292292}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.164650153537075, "units": "Tflops", "t": 1712623131.2083778}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623130.7978234}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.39279923671712, "units": "Tflops", "t": 1712623131.703878}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623131.303362}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.34959541158024, "units": "Tflops", "t": 1712623132.2113159}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623131.8087897}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.63249498239682, "units": "Tflops", "t": 1712623132.704337}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623132.3143435}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.20084208171567, "units": "Tflops", "t": 1712623133.2019727}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623132.8199859}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.34025693935622, "units": "Tflops", "t": 1712623133.6981366}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623133.325479}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.2284173351081, "units": "Tflops", "t": 1712623134.1954625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623133.830965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.571088618473695, "units": "Tflops", "t": 1712623134.7002895}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623134.3366957}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.326405836522156, "units": "Tflops", "t": 1712623135.1965156}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623134.8422031}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.089809163095964, "units": "Tflops", "t": 1712623135.6954353}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623135.3476462}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.851909080277544, "units": "Tflops", "t": 1712623136.1970305}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623135.853093}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.30592207821016, "units": "Tflops", "t": 1712623136.6934912}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623136.358571}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.84855263232728, "units": "Tflops", "t": 1712623137.1951172}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623136.8640428}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.865173117791414, "units": "Tflops", "t": 1712623137.6966076}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623137.369588}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.414774970468116, "units": "Tflops", "t": 1712623138.1918411}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623137.8751435}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.03175828649601, "units": "Tflops", "t": 1712623138.6913884}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623138.3808029}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.832548818853724, "units": "Tflops", "t": 1712623139.193241}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623138.8862462}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.24095520180687, "units": "Tflops", "t": 1712623139.690435}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623139.391826}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.665128705171554, "units": "Tflops", "t": 1712623140.1941767}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623139.8972795}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32", "--tf32"], "time": 1712623140.805768, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/tf32.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/tf32.D1.data new file mode 100644 index 000000000..7c7100da1 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/tf32.D1.data @@ -0,0 +1,183 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp32", "--tf32": true}, "weight": 0.0, "name": "tf32", "tag": ["tf32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623094.2193, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32", "--tf32"], "time": 1712623094.2290885}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 43.73046674857098, "units": "Tflops", "t": 1712623096.0125487}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2393.125, 24512.0], "load": 0, "temperature": null, "power": null}}, "t": 1712623095.3517134}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.1, "temperature": null, "power": null}}, "t": 1712623095.8576996}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.542892533702954, "units": "Tflops", "t": 1712623096.50686}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.23, "temperature": null, "power": null}}, "t": 1712623096.363203}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.28519753160594, "units": "Tflops", "t": 1712623096.9925706}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.43, "temperature": null, "power": null}}, "t": 1712623096.8689373}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.0508741849975, "units": "Tflops", "t": 1712623097.4919267}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.56, "temperature": null, "power": null}}, "t": 1712623097.3747633}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.87645259628623, "units": "Tflops", "t": 1712623097.9831727}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.76, "temperature": null, "power": null}}, "t": 1712623097.8802729}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.41024122919766, "units": "Tflops", "t": 1712623098.4784653}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.89, "temperature": null, "power": null}}, "t": 1712623098.385769}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.833348662100924, "units": "Tflops", "t": 1712623098.9690645}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623098.891298}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.65927090061946, "units": "Tflops", "t": 1712623099.4615808}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623099.3968325}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.510885191135124, "units": "Tflops", "t": 1712623099.9558082}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623099.9023397}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.596433961233544, "units": "Tflops", "t": 1712623100.4497175}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623100.4081955}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.69974419408869, "units": "Tflops", "t": 1712623100.941962}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623100.913711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.54237626795423, "units": "Tflops", "t": 1712623101.4357605}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623101.4192305}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.65254688790535, "units": "Tflops", "t": 1712623101.9284685}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623101.9250765}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.461234947901396, "units": "Tflops", "t": 1712623102.4233258}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.595248024068646, "units": "Tflops", "t": 1712623102.9165077}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623102.4305787}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.697231417231194, "units": "Tflops", "t": 1712623103.4087205}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623102.936197}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.46697960644611, "units": "Tflops", "t": 1712623103.9033675}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623103.4417894}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.52291747713375, "units": "Tflops", "t": 1712623104.3975277}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623103.9472976}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.446794118662204, "units": "Tflops", "t": 1712623104.8923955}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623104.4529493}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.38053819808125, "units": "Tflops", "t": 1712623105.388112}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623104.9585474}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.82174035492504, "units": "Tflops", "t": 1712623105.8900435}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623105.4642127}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.53067745338433, "units": "Tflops", "t": 1712623106.3841202}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623105.969843}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.53910686276721, "units": "Tflops", "t": 1712623106.8779542}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623106.4753597}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.50444199844907, "units": "Tflops", "t": 1712623107.3721845}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623106.980957}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.441654248214554, "units": "Tflops", "t": 1712623107.867226}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623107.486554}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.4004497962744, "units": "Tflops", "t": 1712623108.3626034}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623107.9920566}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.28279947828187, "units": "Tflops", "t": 1712623108.859308}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623108.4976153}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.54035434232721, "units": "Tflops", "t": 1712623109.3532798}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623109.0032525}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.44159000735656, "units": "Tflops", "t": 1712623109.8482015}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623109.5090666}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.95271633725987, "units": "Tflops", "t": 1712623110.3486235}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623110.0146992}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.51894180603004, "units": "Tflops", "t": 1712623110.8428059}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623110.5202694}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.91517298659113, "units": "Tflops", "t": 1712623111.3436496}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623111.0257545}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.66792212633967, "units": "Tflops", "t": 1712623111.8360648}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623111.5313456}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.879706203079785, "units": "Tflops", "t": 1712623112.3261516}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623112.0371003}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.77410709941594, "units": "Tflops", "t": 1712623112.8175516}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623112.5427928}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.83735889224532, "units": "Tflops", "t": 1712623113.3081036}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623113.0483916}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.0926124397762, "units": "Tflops", "t": 1712623113.8069518}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623113.5538704}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.91743004965787, "units": "Tflops", "t": 1712623114.2966278}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623114.0594296}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.59235891390476, "units": "Tflops", "t": 1712623114.7898748}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623114.5650709}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.98973241800559, "units": "Tflops", "t": 1712623115.278886}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623115.0705957}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.20863851156087, "units": "Tflops", "t": 1712623115.7764208}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623115.5762696}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.89099922299202, "units": "Tflops", "t": 1712623116.2663915}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623116.0819058}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.61321634072605, "units": "Tflops", "t": 1712623116.7594018}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623116.587501}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.61876291018303, "units": "Tflops", "t": 1712623117.2523575}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623117.0932891}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.670647956676476, "units": "Tflops", "t": 1712623117.744905}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623117.5989318}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.81653094710899, "units": "Tflops", "t": 1712623118.2356827}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623118.1045806}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.74419768761328, "units": "Tflops", "t": 1712623118.7272546}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623118.6100686}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.70992816478444, "units": "Tflops", "t": 1712623119.2191994}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623119.115859}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.65886005076648, "units": "Tflops", "t": 1712623119.7117229}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623119.6215947}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.56874262482913, "units": "Tflops", "t": 1712623120.2053523}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623120.127231}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.67159991231088, "units": "Tflops", "t": 1712623120.6977284}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623120.6329694}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.64212974907796, "units": "Tflops", "t": 1712623121.1904216}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623121.1384916}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.63096158037835, "units": "Tflops", "t": 1712623121.6832457}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623121.644007}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.58882349742874, "units": "Tflops", "t": 1712623122.1765378}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623122.1497154}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.66513173989785, "units": "Tflops", "t": 1712623122.6689835}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623122.655322}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.51556841503254, "units": "Tflops", "t": 1712623123.1632376}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623123.160865}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.63087519466743, "units": "Tflops", "t": 1712623123.6560695}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.6349788852825, "units": "Tflops", "t": 1712623124.1487935}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623123.6664689}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.715932380751134, "units": "Tflops", "t": 1712623124.640681}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623124.1722424}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.57757320484118, "units": "Tflops", "t": 1712623125.1454055}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623124.677736}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.735950853333186, "units": "Tflops", "t": 1712623125.637069}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623125.1835349}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.62856450101188, "units": "Tflops", "t": 1712623126.1299117}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623125.6890228}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.56944204668812, "units": "Tflops", "t": 1712623126.6348674}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623126.1946452}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.47971746345521, "units": "Tflops", "t": 1712623127.1293674}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623126.7001681}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.611015360771056, "units": "Tflops", "t": 1712623127.622403}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623127.2061338}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.5445919929932, "units": "Tflops", "t": 1712623128.1161737}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623127.7116513}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.380089751922156, "units": "Tflops", "t": 1712623128.6117697}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623128.217299}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.554705381539044, "units": "Tflops", "t": 1712623129.105427}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623128.7228985}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.57546296275335, "units": "Tflops", "t": 1712623129.598863}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623129.2287858}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.81953350139618, "units": "Tflops", "t": 1712623130.1008215}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623129.7343214}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.60150185790077, "units": "Tflops", "t": 1712623130.5941255}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623130.239892}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.55644879115871, "units": "Tflops", "t": 1712623131.0877757}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623130.7454426}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.7192928443037, "units": "Tflops", "t": 1712623131.5796204}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623131.2509398}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.442382324251874, "units": "Tflops", "t": 1712623132.074534}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623131.7566164}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.51668565988643, "units": "Tflops", "t": 1712623132.5686152}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623132.2623246}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.52844161498417, "units": "Tflops", "t": 1712623133.062567}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623132.7678783}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.54938990062055, "units": "Tflops", "t": 1712623133.556289}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623133.273382}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.478559174905016, "units": "Tflops", "t": 1712623134.0507982}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623133.7794974}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.50323947574267, "units": "Tflops", "t": 1712623134.545032}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623134.2851014}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.50238057073835, "units": "Tflops", "t": 1712623135.0394018}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623134.7907135}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.60439215295539, "units": "Tflops", "t": 1712623135.5325139}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623135.2964766}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.904156550370054, "units": "Tflops", "t": 1712623136.033496}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623135.8020573}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.678524446759354, "units": "Tflops", "t": 1712623136.5257902}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623136.307555}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.33642038275346, "units": "Tflops", "t": 1712623137.0218756}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623136.8131287}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.643923225301506, "units": "Tflops", "t": 1712623137.51455}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623137.3187313}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.2704291262752, "units": "Tflops", "t": 1712623138.0113883}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623137.8242586}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.68737799333411, "units": "Tflops", "t": 1712623138.5035825}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623138.3299472}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.31498693912936, "units": "Tflops", "t": 1712623139.011369}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623138.8354478}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.78143306873124, "units": "Tflops", "t": 1712623139.5025263}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623139.3411546}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.228332500661864, "units": "Tflops", "t": 1712623139.9998345}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [3456.375, 24512.0], "load": 0.99, "temperature": null, "power": null}}, "t": 1712623139.8466687}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32", "--tf32"], "time": 1712623140.745916, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/whisper.D0.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/whisper.D0.data new file mode 100644 index 000000000..9e1d236e9 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/whisper.D0.data @@ -0,0 +1,61 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Whisper", "--batch-size": 64}, "plan": {"method": "per_gpu"}, "tags": ["audio", "huggingface"], "weight": 1.0, "name": "whisper", "tag": ["whisper", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623464.110328, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-whisper.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Whisper", "--batch-size", "64"], "time": 1712623466.5244505}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2672.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2672.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2672.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv1d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23104.375, 24512.0], "load": 0.05, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.61 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.37 GiB is free. Including non-PyTorch memory, this process has 20.23 GiB memory in use. Of the allocated memory 19.85 GiB is allocated by PyTorch, and 92.33 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"} +{"event": "line", "data": " runner.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"} +{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"} +{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 2393, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " encoder_outputs = self.encoder(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 1159, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " layer_outputs = encoder_layer(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 722, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " hidden_states, attn_weights, _ = self.self_attn(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 413, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " attn_weights = torch.bmm(query_states, key_states.transpose(1, 2))\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.61 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.37 GiB is free. Including non-PyTorch memory, this process has 20.23 GiB memory in use. Of the allocated memory 19.85 GiB is allocated by PyTorch, and 92.33 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-whisper.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Whisper", "--batch-size", "64"], "time": 1712623479.6472666, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/whisper.D1.data b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/whisper.D1.data new file mode 100644 index 000000000..5f5a74183 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A10-24Q/vodofeze.2024-04-09_00:35:28.669482/whisper.D1.data @@ -0,0 +1,61 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "delicatemastodon.eastus2.cloudapp.azure.com", "ip": "delicatemastodon.eastus2.cloudapp.azure.com", "ipaddrlist": ["00:00:00:00:00:00", "fe80::6245:bdff:feb3:312b%eth0", "60:45:bd:b3:31:2b", "10.0.1.4", "127.0.0.1", "::1"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a10_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.delicatemastodon.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "vodofeze.2024-04-09_00:35:28.669482", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f8568b3c26182a7cfabab1ca058f33bf", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Whisper", "--batch-size": 64}, "plan": {"method": "per_gpu"}, "tags": ["audio", "huggingface"], "weight": 1.0, "name": "whisper", "tag": ["whisper", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 72, "brand": "AMD EPYC 74F3 24-Core Processor"}, "os": {"sysname": "Linux", "nodename": "delicatemastodon", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5701be00-f607-11ee-a28f-e97901091b3e": {"device": "0", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-5701be00-f607-11ee-a28f-f106fd37d291": {"device": "1", "product": "NVIDIA A10-24Q", "memory": {"used": 2390.25, "total": 24512.0}, "utilization": {"compute": 0, "memory": 0.09751346279373369}, "temperature": null, "power": null, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712623466.506795, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-whisper.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Whisper", "--batch-size", "64"], "time": 1712623466.5323417}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2672.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2672.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2672.375, 24512.0], "load": 0, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/conv.py:306: UserWarning: Applied workaround for CuDNN issue, install nvrtc.so (Triggered internally at ../aten/src/ATen/native/cudnn/Conv_v8.cpp:80.)\n", "pipe": "stderr"} +{"event": "line", "data": " return F.conv1d(input, weight, bias, self.stride,\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23104.375, 24512.0], "load": 0.05, "temperature": null, "power": null}}}, "pipe": "data"} +{"event": "error", "data": {"type": "OutOfMemoryError", "message": "CUDA out of memory. Tried to allocate 1.61 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.37 GiB is free. Including non-PyTorch memory, this process has 20.23 GiB memory in use. Of the allocated memory 19.85 GiB is allocated by PyTorch, and 92.33 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 156, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 152, in main\n", "pipe": "stderr"} +{"event": "line", "data": " runner.train()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 70, in train\n", "pipe": "stderr"} +{"event": "line", "data": " self.step(data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface/bench/__main__.py\", line 55, in step\n", "pipe": "stderr"} +{"event": "line", "data": " outputs = self.model(**data)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 2393, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " encoder_outputs = self.encoder(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 1159, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " layer_outputs = encoder_layer(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 722, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " hidden_states, attn_weights, _ = self.self_attn(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1518, in _wrapped_call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return self._call_impl(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1527, in _call_impl\n", "pipe": "stderr"} +{"event": "line", "data": " return forward_call(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py\", line 413, in forward\n", "pipe": "stderr"} +{"event": "line", "data": " attn_weights = torch.bmm(query_states, key_states.transpose(1, 2))\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 1.61 GiB. GPU 0 has a total capacty of 23.73 GiB of which 1.37 GiB is free. Including non-PyTorch memory, this process has 20.23 GiB memory in use. Of the allocated memory 19.85 GiB is allocated by PyTorch, and 92.33 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-whisper.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Whisper", "--batch-size", "64"], "time": 1712623479.3414428, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/README.md b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/README.md new file mode 100644 index 000000000..8ad190f15 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/README.md @@ -0,0 +1,38 @@ +``` +================= +Benchmark results +================= + fail n perf sem% std% peak_memory score weight +bert-fp16 0 3 150.27 0.1% 0.7% 24616 300.011282 0.00 +bert-fp32 0 3 27.25 0.2% 2.2% 31580 54.455513 0.00 +bert-tf32 0 3 115.27 0.1% 1.2% 31582 230.122251 0.00 +bert-tf32-fp16 0 3 149.22 0.1% 0.8% 24616 297.987446 3.00 +bf16 0 3 268.52 0.2% 2.9% 1804 536.612601 0.00 +convnext_large-fp16 0 3 306.17 1.2% 11.5% 27478 614.515028 0.00 +convnext_large-fp32 0 3 42.77 1.3% 12.8% 49598 85.028380 0.00 +convnext_large-tf32 0 3 124.43 2.3% 21.7% 49598 249.263266 0.00 +convnext_large-tf32-fp16 0 3 308.44 1.2% 11.3% 27478 616.772433 3.00 +davit_large 0 3 291.78 0.4% 5.9% 34016 583.029676 1.00 +davit_large-multi 0 2 431.63 3.0% 32.8% 37565 431.631903 5.00 +dlrm 0 2 461813.93 1.6% 17.0% 7120 461813.929762 1.00 +focalnet 0 3 378.80 0.5% 6.2% 26078 760.320817 2.00 +fp16 0 3 252.99 0.1% 1.3% 1804 506.831795 0.00 +fp32 0 3 18.95 0.1% 1.3% 2182 37.869296 0.00 +llama 0 3 471.88 7.1% 77.6% 28442 921.048609 1.00 +reformer 0 3 55.41 0.2% 2.6% 25420 110.828259 1.00 +regnet_y_128gf 0 3 78.30 0.6% 7.9% 31570 156.621543 2.00 +resnet152 0 3 633.17 0.5% 6.7% 35443 1265.788254 1.00 +resnet152-multi 0 2 946.21 3.1% 34.0% 43101 946.214334 5.00 +resnet50 0 3 992.36 1.2% 16.3% 4746 1983.133170 1.00 +rwkv 3 3 NaN NaN NaN 1574 NaN 1.00 +stargan 0 3 38.04 2.0% 27.0% 37442 75.945645 1.00 +super-slomo 0 3 41.08 0.6% 8.5% 33816 82.433797 1.00 +t5 0 3 46.30 0.3% 4.3% 35460 92.612285 2.00 +tf32 0 3 132.81 0.1% 1.3% 2182 265.201409 0.00 +whisper 0 3 214.88 0.1% 1.4% 36740 428.640005 1.00 + +Scores +------ +Failure rate: 3.85% (FAIL) +Score: 427.84 +``` diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/badge.svg b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/badge.svg new file mode 100644 index 000000000..544903e36 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/badge.svg @@ -0,0 +1 @@ +NVIDIA_A100_80GB_PCIeNVIDIA_A100_80GB_PCIepartialpartial \ No newline at end of file diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/bert-fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/bert-fp16.D0.data new file mode 100644 index 000000000..bab0fb4f4 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/bert-fp16.D0.data @@ -0,0 +1,445 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}, {"aliaslist": [], "hostname": "majorpossum.eastus2.cloudapp.azure.com", "ip": "majorpossum.eastus2.cloudapp.azure.com", "ipaddrlist": ["40.70.216.57"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": false, "main": false, "name": "node1", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "suvojova.2024-04-09_15:00:22.247229", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f837b72a6bbb71a07eed38e1458b35b0", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "fp16", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-fp16", "tag": ["bert-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "realbird", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-e5df2666-9a32-cf33-2409-1971e6e097bd": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 72, "power": 96.044, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712675798.080075, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712675798.0966258}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.47928237915039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.324040412902832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.062586784362793}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.039785385131836}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 72, "power": 277.215}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.756427764892578}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.622476577758789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.533597946166992}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.476072311401367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.44356918334961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.426374435424805}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.416016578674316}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.406279563903809}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.395854949951172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3849458694458}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.377933502197266}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.374731063842773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.374163627624512}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.373811721801758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.372345924377441}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 72, "power": 341.09}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.369985580444336}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.367439270019531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.366805076599121}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.366157531738281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36543083190918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.365005493164062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.364080429077148}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.363504409790039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.362876892089844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.362106323242188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36214828491211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361862182617188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 152.159934424546, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36141586303711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361193656921387}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 73, "power": 313.571}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361042022705078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360857963562012}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 154.9051163140101, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360292434692383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36062240600586}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360210418701172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359718322753906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359888076782227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 152.42951540836472, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36003303527832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359695434570312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35957145690918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359235763549805}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359640121459961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.27782082445947, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35922622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358892440795898}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35894775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 74, "power": 285.639}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359354019165039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359068870544434}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.64572566514454, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358457565307617}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35875129699707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358539581298828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358678817749023}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358731269836426}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.6308405336557, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359058380126953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358646392822266}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359306335449219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358448028564453}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358570098876953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.75403492804443, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358207702636719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358234405517578}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 74, "power": 253.474}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358455657958984}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358585357666016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.73718842802583, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358243942260742}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358636856079102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358400344848633}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358379364013672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35833740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.69144613594364, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358665466308594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358572959899902}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358033180236816}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35838508605957}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.71867234433844, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358282089233398}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358419418334961}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 73, "power": 293.956}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35842227935791}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358055114746094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3580322265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.5581793989585, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358024597167969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358108520507812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357969284057617}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358566284179688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358064651489258}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.56294608645698, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358379364013672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358316421508789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358358383178711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358470916748047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358149528503418}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.4616669008864, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358348846435547}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35792350769043}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 74, "power": 318.439}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358392715454102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358460426330566}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.26495626937205, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358633995056152}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358476638793945}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357904434204102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358171463012695}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.53880089576367, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358545303344727}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357903480529785}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358219146728516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358108520507812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358248710632324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.52630950506952, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357797622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35807991027832}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 74, "power": 287.777}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357889175415039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35851001739502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358098983764648}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.7298807631396, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358307838439941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358318328857422}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358499526977539}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358034133911133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357908248901367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.57615977115518, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358236312866211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358089447021484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35861587524414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358320236206055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358344078063965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.6465012235816, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357931137084961}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.99, "temperature": 75, "power": 330.288}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357934951782227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358261108398438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358291625976562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358189582824707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.494419446466, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358278274536133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357937812805176}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358482360839844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357876777648926}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.40245360772383, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358512878417969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358345031738281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358097076416016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358293533325195}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357551574707031}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.40515180121346, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35798454284668}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 75, "power": 261.276}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35827922821045}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358026504516602}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358270645141602}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35817813873291}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.45384354428913, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35809326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358072280883789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358430862426758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357963562011719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [144, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358388900756836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.31365918019296, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [145, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35820198059082}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [146, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358226776123047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [147, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35807991027832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [148, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [149, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358162879943848}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.32432623768238, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [150, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358267784118652}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 74, "power": 254.214}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [151, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358076095581055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [152, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358078002929688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [153, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358617782592773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [154, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358043670654297}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.17184497932362, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [155, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357868194580078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [156, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358116149902344}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [157, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35775375366211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [158, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358063697814941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [159, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357721328735352}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.41387427322832, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [160, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358312606811523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [161, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357690811157227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [162, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357881546020508}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [163, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358210563659668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [164, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 75, "power": 262.015}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.263337306788, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [165, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [166, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358190536499023}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [167, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357914924621582}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [168, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357951164245605}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [169, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357860565185547}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.03264426558414, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [170, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357804298400879}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [171, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357797622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [172, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358224868774414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [173, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35790729522705}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [174, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3578519821167}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.40142710739167, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [175, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357871055603027}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [176, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358051300048828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [177, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358205795288086}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [178, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358312606811523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [179, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357698440551758}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 75, "power": 261.715}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.29105436296922, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 75, "power": 273.535}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712675839.8117232, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/bert-fp32.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/bert-fp32.D0.data new file mode 100644 index 000000000..0438ca7b6 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/bert-fp32.D0.data @@ -0,0 +1,228 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}, {"aliaslist": [], "hostname": "majorpossum.eastus2.cloudapp.azure.com", "ip": "majorpossum.eastus2.cloudapp.azure.com", "ipaddrlist": ["40.70.216.57"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": false, "main": false, "name": "node1", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "suvojova.2024-04-09_15:00:22.247229", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f837b72a6bbb71a07eed38e1458b35b0", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "fp32", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-fp32", "tag": ["bert-fp32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "realbird", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-e5df2666-9a32-cf33-2409-1971e6e097bd": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 71, "power": 94.715, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712675733.547581, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712675733.5635371}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.479292869567871}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.323901176452637}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 0.81, "temperature": 72, "power": 287.678}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.044936180114746}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.83090591430664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.708606719970703}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 73, "power": 284.788}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.565232276916504}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.491073608398438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.468774795532227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.4805908203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 21.73469360420398, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 74, "power": 296.441}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.515676498413086}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.562061309814453}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.61355493287104, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.608406066894531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.640921592712402}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.61151246687917, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 75, "power": 306.312}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.648000717163086}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.63291072845459}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.620305961962718, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.614981651306152}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.622065544128418}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.60435474896471, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 75, "power": 302.577}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.669747352600098}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.752553939819336}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.496411013767172, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.848938941955566}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.93950080871582}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.482477684304342, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 75, "power": 303.493}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.007094383239746}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.038527488708496}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.517362192539565, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.016993522644043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.91185188293457}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.52376980606766, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 75, "power": 289.363}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.875959396362305}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.918696403503418}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.47355349700712, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.017876625061035}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.246811866760254}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.49921239140566, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 76, "power": 291.394}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.431685447692871}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.560530662536621}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.49655615300942, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.64004135131836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.689648628234863}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.464795783545497, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 76, "power": 292.951}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.736746788024902}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.803998947143555}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.49086028657226, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.90011978149414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.020580291748047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.466106279820153, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 76, "power": 295.686}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.152454376220703}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.279325485229492}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.5157341320032, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.385612487792969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.46435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.44772913968458, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 76, "power": 289.946}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.513260841369629}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.550541877746582}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.4802375571043, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.602174758911133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.679553985595703}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.436120579054343, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 77, "power": 293.933}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.78598690032959}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.926142692565918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.484640594033163, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.089705467224121}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.263202667236328}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.40788308136935, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 77, "power": 294.213}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.435498237609863}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.600459098815918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.484803007168644, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.752622604370117}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.883284568786621}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.47102674269471, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 77, "power": 293.845}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.991811752319336}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.081356048583984}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.400340032311608, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.160579681396484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.241079330444336}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.446631123645318, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 77, "power": 299.884}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.328137397766113}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.415624618530273}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.427212974385505, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.503317832946777}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.589831352233887}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.44768877517212, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 77, "power": 305.524}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.672152519226074}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.75645637512207}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.39191853325682, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.840280532836914}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.907486915588379}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.41227345747125, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 77, "power": 302.985}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.961742401123047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.999088287353516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.430991045754745, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.025399208068848}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.040262222290039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.401945326266876, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 77, "power": 302.785}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 77, "power": 302.785}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712675795.4041297, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/bert-tf32-fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/bert-tf32-fp16.D0.data new file mode 100644 index 000000000..7a4835e1c --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/bert-tf32-fp16.D0.data @@ -0,0 +1,447 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}, {"aliaslist": [], "hostname": "majorpossum.eastus2.cloudapp.azure.com", "ip": "majorpossum.eastus2.cloudapp.azure.com", "ipaddrlist": ["40.70.216.57"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": false, "main": false, "name": "node1", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "suvojova.2024-04-09_15:00:22.247229", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f837b72a6bbb71a07eed38e1458b35b0", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 3.0, "name": "bert-tf32-fp16", "tag": ["bert-tf32-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "realbird", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-e5df2666-9a32-cf33-2409-1971e6e097bd": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.04, "memory": 0.010771942138671876}, "temperature": 71, "power": 95.347, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712675887.373118, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712675887.389691}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.47928237915039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.324040412902832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.062586784362793}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.039785385131836}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 72, "power": 315.336}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.756427764892578}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.622476577758789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.533597946166992}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.476072311401367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.44356918334961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.426374435424805}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.416016578674316}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.406279563903809}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.395854949951172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3849458694458}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.377933502197266}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.374731063842773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.374163627624512}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.373811721801758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.372345924377441}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 72, "power": 296.93}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.369985580444336}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.367439270019531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.366805076599121}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.366157531738281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36543083190918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.365005493164062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.364080429077148}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.363504409790039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.362876892089844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.362106323242188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36214828491211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361862182617188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.11708381952508, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36141586303711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361193656921387}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 73, "power": 344.152}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361042022705078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360857963562012}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360292434692383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 142.8378900950317, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36062240600586}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360210418701172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359718322753906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359888076782227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36003303527832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 139.8140552409771, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359695434570312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35957145690918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359235763549805}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359640121459961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35922622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.749738657926, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358892440795898}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35894775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 73, "power": 268.877}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359354019165039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359068870544434}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358457565307617}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.86261143284108, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35875129699707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358539581298828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358678817749023}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358731269836426}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359058380126953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.82882778715876, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358646392822266}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359306335449219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358448028564453}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358570098876953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.65423092196338, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358207702636719}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 73, "power": 332.55}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358234405517578}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358455657958984}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358585357666016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358243942260742}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.7673811405021, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358636856079102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358400344848633}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358379364013672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35833740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358665466308594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.5319819529126, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358572959899902}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358033180236816}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35838508605957}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358282089233398}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.68087857823835, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358419418334961}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 74, "power": 254.747}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35842227935791}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358055114746094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3580322265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358024597167969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.6624826163509, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358108520507812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357969284057617}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358566284179688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358064651489258}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358379364013672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.73446773116743, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358316421508789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358358383178711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358470916748047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358149528503418}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358348846435547}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.36918878181567, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35792350769043}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 74, "power": 264.429}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358392715454102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358460426330566}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358633995056152}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.72115816308317, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358476638793945}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357904434204102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358171463012695}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358545303344727}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.58198256330473, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357903480529785}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358219146728516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358108520507812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358248710632324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357797622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 75, "power": 353.978}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.74191032279717, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35807991027832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357889175415039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35851001739502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358098983764648}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358307838439941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.6741240827949, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358318328857422}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358499526977539}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358034133911133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357908248901367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358236312866211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.761785734757, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358089447021484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35861587524414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358320236206055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358344078063965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357931137084961}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 74, "power": 264.675}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.6662790086085, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357934951782227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358261108398438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358291625976562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358189582824707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358278274536133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.7249839226605, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357937812805176}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358482360839844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357876777648926}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358512878417969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.07582937709373, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358345031738281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358097076416016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358293533325195}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357551574707031}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 75, "power": 330.422}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35798454284668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.52723018495902, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35827922821045}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358026504516602}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358270645141602}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35817813873291}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35809326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.41904904430396, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358072280883789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358430862426758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357963562011719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [144, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358388900756836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [145, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35820198059082}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.36561691085257, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [146, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358226776123047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [147, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35807991027832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [148, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [149, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358162879943848}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 74, "power": 251.976}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [150, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358267784118652}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.51368646726516, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [151, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358076095581055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [152, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358078002929688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [153, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358617782592773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [154, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358043670654297}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [155, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357868194580078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.67089469412193, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [156, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358116149902344}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [157, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35775375366211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [158, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358063697814941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [159, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357721328735352}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [160, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358312606811523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.506517408651, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [161, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357690811157227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [162, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357881546020508}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [163, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358210563659668}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 75, "power": 288.163}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [164, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [165, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.44161256224476, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [166, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358190536499023}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [167, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357914924621582}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [168, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357951164245605}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [169, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357860565185547}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [170, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357804298400879}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.26207791274314, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [171, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357797622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [172, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358224868774414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [173, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35790729522705}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [174, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3578519821167}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [175, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357871055603027}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.69205878966997, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [176, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358051300048828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [177, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358205795288086}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [178, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358312606811523}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 75, "power": 346.297}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [179, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357698440551758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [180, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358502388000488}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.84582459323198, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 75, "power": 254.546}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712675929.4261065, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/bert-tf32.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/bert-tf32.D0.data new file mode 100644 index 000000000..6b16a70e9 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/bert-tf32.D0.data @@ -0,0 +1,373 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}, {"aliaslist": [], "hostname": "majorpossum.eastus2.cloudapp.azure.com", "ip": "majorpossum.eastus2.cloudapp.azure.com", "ipaddrlist": ["40.70.216.57"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": false, "main": false, "name": "node1", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "suvojova.2024-04-09_15:00:22.247229", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f837b72a6bbb71a07eed38e1458b35b0", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-tf32", "tag": ["bert-tf32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "realbird", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-e5df2666-9a32-cf33-2409-1971e6e097bd": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 71, "power": 94.973, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712675842.447617, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712675842.4642339}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.479286193847656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.323932647705078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.044816970825195}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.830974578857422}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.99, "temperature": 72, "power": 266.094}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.708649635314941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.565240859985352}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.491065979003906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.468748092651367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.480533599853516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.515584945678711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.561928749084473}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.608244895935059}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.640739440917969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.647823333740234}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.632768630981445}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 1.0, "temperature": 73, "power": 259.322}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.614849090576172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.621868133544922}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.669413566589355}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.752106666564941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.848526000976562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.939350128173828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.007367134094238}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.03938102722168}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.01879596710205}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 121.11319421179304, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.915424346923828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.861414909362793}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.006791114807129}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.99, "temperature": 73, "power": 270.025}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.027995109558105}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.60317808848755, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.285082817077637}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.481186866760254}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.615607261657715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.69710636138916}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.72290202825832, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.744179725646973}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.784246444702148}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.841986656188965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.928215026855469}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 117.6537322124286, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.041173934936523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.170112609863281}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 1.0, "temperature": 73, "power": 326.561}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.299582481384277}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.414661407470703}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 117.85711280423678, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.506692886352539}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.572625160217285}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.624845504760742}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.687198638916016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.77033033325691, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.77263069152832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.880777359008789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.014769554138184}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.169602394104004}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.76112370956545, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.335892677307129}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.508112907409668}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.98, "temperature": 73, "power": 306.854}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.685002326965332}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.864999771118164}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.41783987487571, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.042545318603516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.21193790435791}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.368173599243164}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.505072593688965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.48627883641265, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.626041412353516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.737335205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.839637756347656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.941758155822754}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.06088889720051, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.044066429138184}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.98, "temperature": 74, "power": 312.885}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.145570755004883}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.23983097076416}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.333688735961914}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 115.93277694084024, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.425797462463379}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.513972282409668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.597168922424316}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.675834655761719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 115.86816607327395, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.757519721984863}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.833535194396973}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.900604248046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.956380844116211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 115.70995663159188, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.98, "temperature": 74, "power": 257.078}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.990116119384766}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.019303321838379}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.04137897491455}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.049532890319824}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 115.99396412720824, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.041813850402832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.022936820983887}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.001933097839355}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.978167533874512}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.30745098315161, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.95536994934082}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.924501419067383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.874979019165039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.806092262268066}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.98, "temperature": 74, "power": 296.062}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.11210682332694, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.719812393188477}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.609824180603027}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.481300354003906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.328900337219238}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.26068847521537, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.169635772705078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.010271072387695}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.85876178741455}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.748523712158203}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.1207568163431, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.666166305541992}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.625612258911133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.650134086608887}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 1.0, "temperature": 75, "power": 327.444}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.727069854736328}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.26042099602385, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.84864330291748}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.988987922668457}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.1289644241333}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.244298934936523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 115.5897207303944, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.312968254089355}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.326717376708984}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.288104057312012}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.236703872680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 114.9880819210059, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.258355140686035}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.536263465881348}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.127537727355957}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.98, "temperature": 75, "power": 309.51}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.74303913116455}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 115.47766984480941, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.15192413330078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.16707420349121}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.868474960327148}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.518776893615723}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 115.81579082256673, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.283734321594238}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.241960525512695}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.301491737365723}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.411982536315918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.4211046785146, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.539347648620605}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.672064781188965}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 1.0, "temperature": 75, "power": 278.963}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.800228118896484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.920104026794434}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.75900538925178, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.03019905090332}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.13632583618164}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.23299217224121}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.31147003173828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 117.3102205315294, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.37289810180664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.41785430908203}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.443866729736328}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.45807647705078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 117.79569707190304, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.459491729736328}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.45636749267578}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.98, "temperature": 75, "power": 272.639}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.44569206237793}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.440540313720703}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 117.86082087111531, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.448402404785156}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.4744815826416}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.53107261657715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.61451530456543}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 118.09696004732972, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.71721076965332}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.82810401916504}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.940759658813477}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 17.044357299804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 117.85847580559265, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 17.134565353393555}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.98, "temperature": 76, "power": 299.114}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 17.20937156677246}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 17.272945404052734}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 17.317352294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 118.6904240139362, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 1.0, "temperature": 76, "power": 289.535}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712675884.776947, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/bf16.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/bf16.D0.data new file mode 100644 index 000000000..3ab0b79f0 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/bf16.D0.data @@ -0,0 +1,110 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}, {"aliaslist": [], "hostname": "majorpossum.eastus2.cloudapp.azure.com", "ip": "majorpossum.eastus2.cloudapp.azure.com", "ipaddrlist": ["40.70.216.57"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": false, "main": false, "name": "node1", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "suvojova.2024-04-09_15:00:22.247229", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f837b72a6bbb71a07eed38e1458b35b0", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "bf16"}, "weight": 0.0, "name": "bf16", "tag": ["bf16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "realbird", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-e5df2666-9a32-cf33-2409-1971e6e097bd": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 67, "power": 91.156, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712675171.232775, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "bf16"], "time": 1712675171.2431242}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 186.92475354722737, "units": "Tflops", "t": 1712675173.0318244}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [882.5, 81920.0], "load": 0, "temperature": 64, "power": 56.423}}, "t": 1712675172.5536447}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.6863740140912, "units": "Tflops", "t": 1712675173.113385}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 0, "temperature": 69, "power": 352.981}}, "t": 1712675173.0651615}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.0679165633031, "units": "Tflops", "t": 1712675173.1937354}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.2194748581768, "units": "Tflops", "t": 1712675173.2739897}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.2610604508124, "units": "Tflops", "t": 1712675173.354221}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.22925856890316, "units": "Tflops", "t": 1712675173.434458}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.1681218280737, "units": "Tflops", "t": 1712675173.5147145}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.2643225993481, "units": "Tflops", "t": 1712675173.5949414}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 306.904}}, "t": 1712675173.5732803}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.20072469713017, "units": "Tflops", "t": 1712675173.6752446}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.137156282929, "units": "Tflops", "t": 1712675173.7555118}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.2114299140057, "units": "Tflops", "t": 1712675173.837246}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 262.00305189725896, "units": "Tflops", "t": 1712675173.9212258}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.36271615575504, "units": "Tflops", "t": 1712675174.0035388}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.8344149148892, "units": "Tflops", "t": 1712675174.0850914}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 70, "power": 282.515}}, "t": 1712675174.082773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.66060362058977, "units": "Tflops", "t": 1712675174.165859}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.03453108366807, "units": "Tflops", "t": 1712675174.2461686}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.1567126554877, "units": "Tflops", "t": 1712675174.326448}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.1721967627041, "units": "Tflops", "t": 1712675174.4067094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.21865958046135, "units": "Tflops", "t": 1712675174.486958}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.1779018746905, "units": "Tflops", "t": 1712675174.567218}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.18523736755856, "units": "Tflops", "t": 1712675174.6474786}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 306.0}}, "t": 1712675174.5906804}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 273.661331040446, "units": "Tflops", "t": 1712675174.727951}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.86453436880896, "units": "Tflops", "t": 1712675174.8104117}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.5937907181462, "units": "Tflops", "t": 1712675174.892946}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.37951740933215, "units": "Tflops", "t": 1712675174.9749293}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.44903768714056, "units": "Tflops", "t": 1712675175.0568912}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.5104248042359, "units": "Tflops", "t": 1712675175.1379292}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 288.861}}, "t": 1712675175.0988371}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.8606474921907, "units": "Tflops", "t": 1712675175.2186394}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.1632320664046, "units": "Tflops", "t": 1712675175.2989209}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.1795319502964, "units": "Tflops", "t": 1712675175.3791733}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.25779837987693, "units": "Tflops", "t": 1712675175.4594011}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.22599725442484, "units": "Tflops", "t": 1712675175.5396461}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.21050706992713, "units": "Tflops", "t": 1712675175.6198857}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 307.57}}, "t": 1712675175.6085417}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.23415068607136, "units": "Tflops", "t": 1712675175.7001708}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.7047818718027, "units": "Tflops", "t": 1712675175.7811558}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.5545585713804, "units": "Tflops", "t": 1712675175.8630855}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.32408322728736, "units": "Tflops", "t": 1712675175.9450848}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.3201802747611, "units": "Tflops", "t": 1712675176.0270858}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.3514070745196, "units": "Tflops", "t": 1712675176.1090786}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.5009054156533, "units": "Tflops", "t": 1712675176.1907287}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 288.283}}, "t": 1712675176.1156206}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.5710079454852, "units": "Tflops", "t": 1712675176.2724426}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.0904751956837, "units": "Tflops", "t": 1712675176.35422}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.0453765316801, "units": "Tflops", "t": 1712675176.4350998}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.49124413077374, "units": "Tflops", "t": 1712675176.516145}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 273.71411892580596, "units": "Tflops", "t": 1712675176.5965326}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.20969184553473, "units": "Tflops", "t": 1712675176.6767728}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 295.207}}, "t": 1712675176.6226022}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.18034699536787, "units": "Tflops", "t": 1712675176.7570727}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.1558977509237, "units": "Tflops", "t": 1712675176.8373306}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 273.2721422645082, "units": "Tflops", "t": 1712675176.9178464}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.45768447237486, "units": "Tflops", "t": 1712675176.9989016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.46088027285606, "units": "Tflops", "t": 1712675177.0799563}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.2605811951809, "units": "Tflops", "t": 1712675177.161369}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 295.215}}, "t": 1712675177.1309574}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.7830803418526, "units": "Tflops", "t": 1712675177.2432873}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.1552982295415, "units": "Tflops", "t": 1712675177.3247378}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.8162596091954, "units": "Tflops", "t": 1712675177.406287}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.4640761485848, "units": "Tflops", "t": 1712675177.4873483}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.42093817439854, "units": "Tflops", "t": 1712675177.5684392}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 273.2543309747281, "units": "Tflops", "t": 1712675177.6489658}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 296.374}}, "t": 1712675177.6379707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.1942035030152, "units": "Tflops", "t": 1712675177.7292898}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.23088925523217, "units": "Tflops", "t": 1712675177.8095384}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.28552845652666, "units": "Tflops", "t": 1712675177.8897753}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.4801707800889, "units": "Tflops", "t": 1712675177.970546}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.5144211190069, "units": "Tflops", "t": 1712675178.0515957}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.5016333258205, "units": "Tflops", "t": 1712675178.1326466}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.48485117516367, "units": "Tflops", "t": 1712675178.213702}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 296.849}}, "t": 1712675178.1460278}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.47446326441354, "units": "Tflops", "t": 1712675178.2948143}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.492043271395, "units": "Tflops", "t": 1712675178.3758595}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.4936415667514, "units": "Tflops", "t": 1712675178.4569042}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.4952398809265, "units": "Tflops", "t": 1712675178.5379643}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.492043271395, "units": "Tflops", "t": 1712675178.619009}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.0991482119348, "units": "Tflops", "t": 1712675178.6998744}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 294.421}}, "t": 1712675178.653044}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.20969184553473, "units": "Tflops", "t": 1712675178.780167}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.55359123028205, "units": "Tflops", "t": 1712675178.8611991}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.74526379949543, "units": "Tflops", "t": 1712675178.9418724}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.90262360427505, "units": "Tflops", "t": 1712675179.0227964}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.450517139925, "units": "Tflops", "t": 1712675179.1044638}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.94381652732807, "units": "Tflops", "t": 1712675179.1850884}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 296.65}}, "t": 1712675179.1610317}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.46807109906655, "units": "Tflops", "t": 1712675179.2662127}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.4816548102612, "units": "Tflops", "t": 1712675179.3472867}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.5048301611883, "units": "Tflops", "t": 1712675179.4283285}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.45768447237486, "units": "Tflops", "t": 1712675179.5093827}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.55359123028205, "units": "Tflops", "t": 1712675179.5904162}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.2260372788168, "units": "Tflops", "t": 1712675179.6712599}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 296.664}}, "t": 1712675179.6694937}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.21458326459987, "units": "Tflops", "t": 1712675179.751568}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.4800566560344, "units": "Tflops", "t": 1712675179.8326168}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.4736642272827, "units": "Tflops", "t": 1712675179.9136744}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.4768604040282, "units": "Tflops", "t": 1712675179.994729}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.4672720995637, "units": "Tflops", "t": 1712675180.0757892}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.45368982758623, "units": "Tflops", "t": 1712675180.1568558}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.50562938179326, "units": "Tflops", "t": 1712675180.2378972}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 297.069}}, "t": 1712675180.180792}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "bf16"], "time": 1712675181.1504602, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/convnext_large-fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/convnext_large-fp16.D0.data new file mode 100644 index 000000000..bda00a6bf --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/convnext_large-fp16.D0.data @@ -0,0 +1,303 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}, {"aliaslist": [], "hostname": "majorpossum.eastus2.cloudapp.azure.com", "ip": "majorpossum.eastus2.cloudapp.azure.com", "ipaddrlist": ["40.70.216.57"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": false, "main": false, "name": "node1", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "suvojova.2024-04-09_15:00:22.247229", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f837b72a6bbb71a07eed38e1458b35b0", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-fp16", "tag": ["convnext_large-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "realbird", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-e5df2666-9a32-cf33-2409-1971e6e097bd": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.05, "memory": 0.010771942138671876}, "temperature": 67, "power": 91.041, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712675471.190745, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712675471.2069}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 63, "power": 53.61}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 62, "power": 53.003}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.214508056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.76, "temperature": 66, "power": 329.17}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.33624267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.254852294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.179443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.268218994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.201690673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.164154052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13348388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.16265869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 1.0, "temperature": 67, "power": 297.817}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0748291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077056884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.142242431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1138916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11871337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0850830078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01800537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 348.5513281868774, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.9, "temperature": 68, "power": 293.387}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.074462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9622802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.078765869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 318.099983760416, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.048004150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0762939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.083831787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 319.2919727112792, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.153045654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962799072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.91, "temperature": 68, "power": 308.335}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.094970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 320.0766135382165, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.002777099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.088836669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.033294677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 318.4836254119707, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938385009765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.112030029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 347.8743696502833, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0, "temperature": 64, "power": 82.753}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84075927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938690185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93060302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 225.80661487882512, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.798553466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88812255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.874664306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 316.9911323233139, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944305419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958221435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.99, "temperature": 69, "power": 295.799}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.954071044921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.66245395262223, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94793701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.856536865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89544677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 320.2578796511421, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9688720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.985076904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 320.47696774053725, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.878204345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.95, "temperature": 68, "power": 321.913}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.996490478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.023529052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 320.4844699635545, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.968353271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947784423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.955780029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 319.1640144947439, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0440673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 1.0, "temperature": 68, "power": 260.555}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 318.3758686506261, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.907257080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.994720458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921539306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 321.3001092977626, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.981842041015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.964111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.957977294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 319.5115982984763, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04656982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [2, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.88, "temperature": 66, "power": 84.29}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.791717529296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 347.2120109076165, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962738037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77764892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 189.2846549808665, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83221435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86846923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85845947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 311.6234474700737, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921295166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.9, "temperature": 70, "power": 295.209}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.807586669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.933868408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 300.20357109049894, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.926849365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95281982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 311.22177660396807, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90093994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88641357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.918609619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 316.7915666917031, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98846435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.89, "temperature": 71, "power": 309.872}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.898773193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9808349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 317.15694645791615, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92547607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.915435791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 318.7402040458745, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.900665283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.936798095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.848236083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 1.0, "temperature": 71, "power": 234.281}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 319.5256867510875, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9835205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.911590576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99957275390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 317.31009384835227, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94586181640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9364013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.957916259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 317.2965917893974, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958465576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.973114013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 1.0, "temperature": 70, "power": 341.449}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [3, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.763885498046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 345.59525611759784, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.794891357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 123.70144660115051, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.780670166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.772857666015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.878204345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 313.991695125721, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.843994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.9, "temperature": 71, "power": 349.183}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.841278076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8758544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 316.94416435100027, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.89, "temperature": 72, "power": 303.949}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712675522.9576628, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/convnext_large-fp32.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/convnext_large-fp32.D0.data new file mode 100644 index 000000000..3f2c0b519 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/convnext_large-fp32.D0.data @@ -0,0 +1,170 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}, {"aliaslist": [], "hostname": "majorpossum.eastus2.cloudapp.azure.com", "ip": "majorpossum.eastus2.cloudapp.azure.com", "ipaddrlist": ["40.70.216.57"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": false, "main": false, "name": "node1", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "suvojova.2024-04-09_15:00:22.247229", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f837b72a6bbb71a07eed38e1458b35b0", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "fp32", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-fp32", "tag": ["convnext_large-fp32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "realbird", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-e5df2666-9a32-cf33-2409-1971e6e097bd": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 66, "power": 89.23, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712675385.916222, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712675385.932033}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 62, "power": 53.24}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 61, "power": 52.468}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.214483737945557}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [44852.375, 81920.0], "load": 1.0, "temperature": 67, "power": 305.106}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.33617639541626}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.254825592041016}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 68, "power": 278.0}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.179396152496338}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.97, "temperature": 68, "power": 295.094}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.268494129180908}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 64.15711371420358, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2018351554870605}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 68, "power": 264.918}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 32.119223103343124, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.164165019989014}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 69, "power": 283.199}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.34100591470806, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.133245468139648}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.31745669461583, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.162621021270752}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 69, "power": 309.693}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.18354802462252, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.066978931427002}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 70, "power": 302.869}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.211938739513656, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.074810981750488}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.31605547971682, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0772786140441895}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 69, "power": 325.173}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.28743390383642, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043660640716553}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.331664597067174, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.142033576965332}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 70, "power": 264.005}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.32624875325141, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11406946182251}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 70, "power": 304.943}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.29865743393779, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.118796348571777}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.2958688368918, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.084875583648682}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 71, "power": 265.096}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.258447821017896, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.017924785614014}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 71, "power": 294.035}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.271019016937224, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0746049880981445}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.282147070832515, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962323188781738}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 70, "power": 293.436}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.21655005273149, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.078864097595215}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 71, "power": 287.418}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.21832538869766, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.047895908355713}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.22349391322374, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.076254367828369}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 70, "power": 283.431}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.21049787243987, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.083705902099609}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 72, "power": 288.39}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.2070469649956, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.153024196624756}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.17117983319621, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962716102600098}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 71, "power": 283.007}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.19712447952373, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.094931125640869}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 72, "power": 285.438}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.238550767445716, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.002690315246582}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.22881317734754, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.089021682739258}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 72, "power": 275.683}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.207368697358866, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.033406734466553}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 72, "power": 315.195}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.20686893360487, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93844747543335}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.197471768412825, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.112069606781006}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 71, "power": 267.276}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.15060788060614, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.875514507293701}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.85, "temperature": 71, "power": 290.712}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 54.94509560668991, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.840810298919678}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 64.83451732769717, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938738822937012}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 72, "power": 296.862}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 31.970575616973672, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 72, "power": 323.894}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712675468.5829933, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/convnext_large-tf32-fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/convnext_large-tf32-fp16.D0.data new file mode 100644 index 000000000..b09ef71b4 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/convnext_large-tf32-fp16.D0.data @@ -0,0 +1,301 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}, {"aliaslist": [], "hostname": "majorpossum.eastus2.cloudapp.azure.com", "ip": "majorpossum.eastus2.cloudapp.azure.com", "ipaddrlist": ["40.70.216.57"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": false, "main": false, "name": "node1", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "suvojova.2024-04-09_15:00:22.247229", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f837b72a6bbb71a07eed38e1458b35b0", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 3.0, "name": "convnext_large-tf32-fp16", "tag": ["convnext_large-tf32-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "realbird", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-e5df2666-9a32-cf33-2409-1971e6e097bd": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 67, "power": 90.85, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712675583.93246, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712675583.9489958}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 62, "power": 53.199}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 60, "power": 52.404}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.214508056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [22582.375, 81920.0], "load": 0.06, "temperature": 62, "power": 117.708}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.33624267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.254852294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.179443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.268218994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.201690673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.164154052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13348388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.16265869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.9, "temperature": 64, "power": 179.897}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0748291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077056884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.142242431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1138916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11871337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0850830078125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.99, "temperature": 66, "power": 265.08}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01800537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 326.2837095330548, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.074462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9622802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.078765869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 348.06948621012646, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.048004150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0762939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.083831787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 320.963673369061, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.153045654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.91, "temperature": 67, "power": 299.194}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962799072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.094970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 320.0849609513977, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.002777099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.088836669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.033294677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 319.00498782076335, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938385009765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.112030029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 348.79415914242344, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.44, "temperature": 63, "power": 81.48}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84075927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938690185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93060302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 226.20610603938562, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.798553466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88812255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.874664306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 315.8685545978821, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944305419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.99, "temperature": 67, "power": 251.339}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958221435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.954071044921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.1879252810222, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94793701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.856536865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89544677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 318.23489925823844, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9688720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.985076904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.99, "temperature": 67, "power": 323.065}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 317.8934520192511, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.878204345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.996490478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.023529052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 315.4705272736109, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.968353271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947784423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.955780029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 318.31609380481177, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0440673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.92, "temperature": 69, "power": 250.766}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 320.95846650003193, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.907257080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.994720458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921539306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 319.1542333028915, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.981842041015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.964111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.957977294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 317.703704079935, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04656982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.9, "temperature": 69, "power": 315.965}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [2, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.791717529296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 347.91373001770063, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962738037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77764892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 197.3332471315932, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83221435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86846923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85845947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 329.0505648523985, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.89, "temperature": 69, "power": 282.022}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921295166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.807586669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.933868408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 332.2672713871013, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.926849365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 334.72490680604835, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95281982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90093994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88641357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 318.0386168899093, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.918609619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.99, "temperature": 70, "power": 313.897}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98846435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.898773193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 317.59753424618856, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9808349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92547607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 318.2537823388754, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.915435791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.900665283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.936798095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.99, "temperature": 69, "power": 300.562}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 316.783630593949, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.848236083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9835205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.911590576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 317.42497978854306, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99957275390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94586181640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9364013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 318.5205767672726, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.957916259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958465576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.91, "temperature": 69, "power": 339.754}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.973114013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 318.6993319881903, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [3, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.763885498046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.7540960004665, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.794891357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.780670166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.772857666015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 222.79726027485484, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.878204345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.99, "temperature": 70, "power": 243.912}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.843994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.841278076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 317.2957127022304, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.9, "temperature": 70, "power": 298.899}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712675635.7054276, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/convnext_large-tf32.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/convnext_large-tf32.D0.data new file mode 100644 index 000000000..10725034f --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/convnext_large-tf32.D0.data @@ -0,0 +1,200 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}, {"aliaslist": [], "hostname": "majorpossum.eastus2.cloudapp.azure.com", "ip": "majorpossum.eastus2.cloudapp.azure.com", "ipaddrlist": ["40.70.216.57"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": false, "main": false, "name": "node1", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "suvojova.2024-04-09_15:00:22.247229", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f837b72a6bbb71a07eed38e1458b35b0", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-tf32", "tag": ["convnext_large-tf32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "realbird", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-e5df2666-9a32-cf33-2409-1971e6e097bd": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 67, "power": 89.191, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712675525.625064, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712675525.6411371}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 62, "power": 53.407}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 61, "power": 52.502}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2144575119018555}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [47234.375, 81920.0], "load": 0.76, "temperature": 66, "power": 289.626}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.336220741271973}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2547926902771}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.179419040679932}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.268465518951416}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.97, "temperature": 67, "power": 291.974}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2018303871154785}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1641645431518555}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.133294105529785}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.162642955780029}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 67, "power": 260.86}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 88.43533726870261, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.066996097564697}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.074804782867432}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 152.587014695681, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077259063720703}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 98.38931570865843, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0435991287231445}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 68, "power": 265.669}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.142058372497559}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 152.33242752088404, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.114046573638916}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 98.0434625091416, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1187896728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.084855556488037}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 69, "power": 312.11}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.11675003788588, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.017906188964844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.0962807288631, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.074584484100342}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962344169616699}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.97079235132642, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.97, "temperature": 69, "power": 163.219}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.078824996948242}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.97579286979268, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.047876834869385}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0762128829956055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.8121238292379, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.083765983581543}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 70, "power": 267.514}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.93646266396927, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.153017044067383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962660312652588}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.47755709135427, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.094902038574219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.40245408085529, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.002725601196289}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 70, "power": 279.396}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.089033126831055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.7529454793587, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.033377170562744}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.33631575150733, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938430309295654}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.112060070037842}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 71, "power": 292.089}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.39342478604044, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.875522613525391}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 144.9422802358933, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.840785503387451}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 95.94617903884125, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938703536987305}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 71, "power": 339.213}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9308247566223145}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.92655679161845, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.798532009124756}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.62729534947238, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.888091087341309}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.874434947967529}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 71, "power": 303.013}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.28521866630473, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944365501403809}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.50773415730862, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958274841308594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.953979015350342}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.4373309879558, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 72, "power": 313.024}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947924613952637}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.71389935178279, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85654878616333}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.895290374755859}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.55101733350438, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912010192871094}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 72, "power": 275.078}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.75981243667965, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.968919277191162}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.984976768493652}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.40326194176092, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.878252029418945}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.78453139545987, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.996346950531006}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 72, "power": 289.836}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.023502349853516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.41143495105138, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.968320369720459}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.54531445673496, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947745323181152}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.955850124359131}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 72, "power": 294.827}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.35007906474922, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 73, "power": 323.782}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712675581.25681, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/davit_large-multi.0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/davit_large-multi.0.data new file mode 100644 index 000000000..03d23c24d --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/davit_large-multi.0.data @@ -0,0 +1,255 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}, {"aliaslist": [], "hostname": "majorpossum.eastus2.cloudapp.azure.com", "ip": "majorpossum.eastus2.cloudapp.azure.com", "ipaddrlist": ["40.70.216.57"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": false, "main": false, "name": "node1", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "suvojova.2024-04-09_15:00:22.247229", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f837b72a6bbb71a07eed38e1458b35b0", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "njobs", "n": 1}, "argv": {"--amp": true, "--model": "davit_large", "--batch-size": 128, "--lr-base": 0.01}, "tags": ["classification", "multigpu", "transformer", "vision"], "weight": 5.0, "name": "davit_large-multi", "tag": ["davit_large-multi", "0"], "job-number": 0, "devices": ["0"]}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "realbird", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-e5df2666-9a32-cf33-2409-1971e6e097bd": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.71, "memory": 0.010771942138671876}, "temperature": 65, "power": 88.296, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712676546.637992, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/suvojova.2024-04-09_15:00:22.247229/davit_large-multi.0", "--checkpoint-hist", "1"], "time": 1712676546.6536248}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model davit_large created, param count:196811752\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.005) calculated from base learning rate (0.01) and global batch size (128) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2154.375, 81920.0], "load": 0, "temperature": 61, "power": 78.757}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2242937088012695}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 0/32 ( 0%)] Loss: 7.224 (7.22) Time: 3.267s, 39.18/s (3.267s, 39.18/s) LR: 1.000e-05 Data: 0.736 (0.736)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 7.176398277282715}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 1.0, "temperature": 66, "power": 270.213}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.255929470062256}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.163320541381836}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 1.0, "temperature": 66, "power": 262.402}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.234607696533203}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.243466377258301}, "pipe": "data"} +{"event": "data", "data": {"rate": 303.37316522131647, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0485429763793945}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 0.96, "temperature": 66, "power": 305.972}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 278.2289143695566, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.246738433837891}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.7931441167156, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.151193141937256}, "pipe": "data"} +{"event": "data", "data": {"rate": 279.37390339536125, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 0.95, "temperature": 68, "power": 317.751}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.270837306976318}, "pipe": "data"} +{"event": "data", "data": {"rate": 310.08658886170474, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 280.47315646199417, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.312950611114502}, "pipe": "data"} +{"event": "data", "data": {"rate": 310.3874501714779, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 0.96, "temperature": 69, "power": 308.617}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.311519622802734}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 31/32 (100%)] Loss: 7.312 (7.24) Time: 0.412s, 310.57/s (0.508s, 251.77/s) LR: 1.000e-05 Data: 0.000 (0.030)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.701 (0.701) Loss: 7.1174 (7.1174) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.339 (0.178) Loss: 7.0505 (7.2336) Acc@1: 0.0000 ( 0.0969) Acc@5: 0.0000 ( 0.5329)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/suvojova.2024-04-09_15:00:22.247229/davit_large-multi.0/20240409-152912-davit_large-224/checkpoint-0.pth.tar', 0.09689922480620156)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 310.3800218544878, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32722.375, 81920.0], "load": 0.93, "temperature": 69, "power": 306.884}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [10182.375, 81920.0], "load": 0.99, "temperature": 70, "power": 301.029}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.413557052612305}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 0/32 ( 0%)] Loss: 7.414 (7.41) Time: 0.922s, 138.82/s (0.922s, 138.82/s) LR: 1.008e-03 Data: 0.509 (0.509)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32496.375, 81920.0], "load": 0.49, "temperature": 68, "power": 278.515}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.087121963500977}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.5530158355048, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 277.90281474560675, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.022743225097656}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32570.375, 81920.0], "load": 0.94, "temperature": 70, "power": 297.713}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.9529978266943, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.015771865844727}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.42571738625753, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00472354888916}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.22851540391065, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32570.375, 81920.0], "load": 0.96, "temperature": 70, "power": 279.485}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.067629814147949}, "pipe": "data"} +{"event": "data", "data": {"rate": 278.12789359615977, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.943271636962891}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.01387738877946, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.9099753868893, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.032142162322998}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32570.375, 81920.0], "load": 0.98, "temperature": 70, "power": 283.734}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.853598063259, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.978492259979248}, "pipe": "data"} +{"event": "data", "data": {"rate": 277.73639276489496, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.981564998626709}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.7049045513416, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32570.375, 81920.0], "load": 0.97, "temperature": 70, "power": 247.884}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.016761779785156}, "pipe": "data"} +{"event": "data", "data": {"rate": 277.76462856421983, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 31/32 (100%)] Loss: 6.997 (7.05) Time: 0.412s, 310.37/s (0.436s, 293.70/s) LR: 1.008e-03 Data: 0.000 (0.023)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.579 (0.579) Loss: 6.8693 (6.8693) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.7812 ( 0.7812)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.166) Loss: 6.7070 (6.8682) Acc@1: 0.0000 ( 0.2907) Acc@5: 3.1250 ( 1.3081)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/suvojova.2024-04-09_15:00:22.247229/davit_large-multi.0/20240409-152912-davit_large-224/checkpoint-1.pth.tar', 0.29069767441860467)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 309.65609828385857, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32814.375, 81920.0], "load": 0.96, "temperature": 71, "power": 325.07}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32814.375, 81920.0], "load": 0.94, "temperature": 72, "power": 312.226}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32814.375, 81920.0], "load": 0, "temperature": 66, "power": 84.562}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.885795593261719}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 0/32 ( 0%)] Loss: 6.886 (6.89) Time: 0.933s, 137.23/s (0.933s, 137.23/s) LR: 2.006e-03 Data: 0.519 (0.519)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 256.50295355636285, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.914775848388672}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.4687042799191, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931666851043701}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33058.375, 81920.0], "load": 0.97, "temperature": 71, "power": 329.482}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 277.24255494873523, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.940074920654297}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.4625388991043, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.56231999141966, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.993440628051758}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33058.375, 81920.0], "load": 1.0, "temperature": 72, "power": 271.089}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.8119851401167, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912421226501465}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.16753585328314, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.013060569763184}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.3705395029664, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33058.375, 81920.0], "load": 0.99, "temperature": 72, "power": 310.532}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.895408630371094}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.3154326183247, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.973190784454346}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.36646894326464, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 277.0332041479887, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.093724727630615}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33058.375, 81920.0], "load": 0.96, "temperature": 72, "power": 300.256}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.31681278445427, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.032838821411133}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 31/32 (100%)] Loss: 6.946 (6.95) Time: 0.413s, 309.59/s (0.437s, 293.16/s) LR: 2.006e-03 Data: 0.000 (0.023)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 278.8758237974133, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.592 (0.592) Loss: 6.7520 (6.7520) Acc@1: 0.0000 ( 0.0000) Acc@5: 2.3438 ( 2.3438)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.166) Loss: 6.4894 (6.8228) Acc@1: 0.0000 ( 0.2180) Acc@5: 6.2500 ( 1.0417)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 309.55366803647956, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33302.375, 81920.0], "load": 0.96, "temperature": 73, "power": 273.581}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33302.375, 81920.0], "load": 0.97, "temperature": 73, "power": 284.342}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33302.375, 81920.0], "load": 0.03, "temperature": 67, "power": 87.065}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.859885215759277}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 0/32 ( 0%)] Loss: 6.860 (6.86) Time: 0.912s, 140.32/s (0.912s, 140.32/s) LR: 3.004e-03 Data: 0.500 (0.500)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 262.29914921652073, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.857763290405273}, "pipe": "data"} +{"event": "data", "data": {"rate": 274.6835825658188, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.914639472961426}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33546.375, 81920.0], "load": 0.98, "temperature": 72, "power": 253.202}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.04075485396606, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.9015966478128, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8981032371521}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.36128231516557, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90637731552124}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33546.375, 81920.0], "load": 1.0, "temperature": 72, "power": 215.331}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 274.0361222834086, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.973822116851807}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.7445747053766, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.901671409606934}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.2325256558646, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33546.375, 81920.0], "load": 0.96, "temperature": 73, "power": 224.717}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.994017601013184}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.0548797227454, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.80420952208, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.034119606018066}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.2194434530632, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96621561050415}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33546.375, 81920.0], "load": 0.97, "temperature": 74, "power": 302.07}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 277.5578842305458, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.014098167419434}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 31/32 (100%)] Loss: 7.000 (6.96) Time: 0.414s, 309.15/s (0.437s, 293.23/s) LR: 3.004e-03 Data: 0.000 (0.022)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 308.9342103456384, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.567 (0.567) Loss: 6.8269 (6.8269) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.166) Loss: 6.3619 (6.8160) Acc@1: 0.0000 ( 0.1211) Acc@5: 6.2500 ( 0.9690)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 271.99844882684613, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33790.375, 81920.0], "load": 0.96, "temperature": 73, "power": 321.579}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33790.375, 81920.0], "load": 0.95, "temperature": 74, "power": 312.944}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33790.375, 81920.0], "load": 0.03, "temperature": 69, "power": 88.427}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.870382308959961}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 0/32 ( 0%)] Loss: 6.870 (6.87) Time: 0.906s, 141.21/s (0.906s, 141.21/s) LR: 4.002e-03 Data: 0.493 (0.493)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 279.77120376527455, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82788610458374}, "pipe": "data"} +{"event": "data", "data": {"rate": 274.08551253647704, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34034.375, 81920.0], "load": 1.0, "temperature": 73, "power": 264.186}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.871139049530029}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.3262840965321, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.915597915649414}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.09781484670873, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.914789199829102}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.7120335623567, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34034.375, 81920.0], "load": 0.96, "temperature": 74, "power": 306.283}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.1226389254549, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.990839004516602}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.2040820016732, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.044808864593506}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34034.375, 81920.0], "load": 0.96, "temperature": 74, "power": 332.21}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 274.3057674088407, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.072300910949707}, "pipe": "data"} +{"event": "data", "data": {"rate": 304.8139183540822, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.011628150939941}, "pipe": "data"} +{"event": "data", "data": {"rate": 273.8255012422864, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34034.375, 81920.0], "load": 1.0, "temperature": 74, "power": 275.03}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.044508934020996}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.53096869564695, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.9552362706042, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "stop", "data": null, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/suvojova.2024-04-09_15:00:22.247229/davit_large-multi.0", "--checkpoint-hist", "1"], "time": 1712676654.9128664, "return_code": -15}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/davit_large.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/davit_large.D0.data new file mode 100644 index 000000000..a3f603e2d --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/davit_large.D0.data @@ -0,0 +1,255 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}, {"aliaslist": [], "hostname": "majorpossum.eastus2.cloudapp.azure.com", "ip": "majorpossum.eastus2.cloudapp.azure.com", "ipaddrlist": ["40.70.216.57"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": false, "main": false, "name": "node1", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "suvojova.2024-04-09_15:00:22.247229", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f837b72a6bbb71a07eed38e1458b35b0", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "davit_large", "--batch-size": 128, "--lr-base": 0.01}, "tags": ["classification", "transformer", "vision"], "weight": 1.0, "name": "davit_large", "tag": ["davit_large", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "realbird", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-e5df2666-9a32-cf33-2409-1971e6e097bd": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.17, "memory": 0.010771942138671876}, "temperature": 59, "power": 82.151, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712676436.376237, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/suvojova.2024-04-09_15:00:22.247229/davit_large.D0", "--checkpoint-hist", "1"], "time": 1712676436.3917747}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model davit_large created, param count:196811752\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.005) calculated from base learning rate (0.01) and global batch size (128) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2326.375, 81920.0], "load": 0, "temperature": 55, "power": 73.656}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.224214553833008}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 0/32 ( 0%)] Loss: 7.224 (7.22) Time: 3.157s, 40.54/s (3.157s, 40.54/s) LR: 1.000e-05 Data: 0.666 (0.666)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 7.176412582397461}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 1.0, "temperature": 59, "power": 274.09}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.255987167358398}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.16331672668457}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 1.0, "temperature": 61, "power": 319.967}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.234622955322266}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.24342155456543}, "pipe": "data"} +{"event": "data", "data": {"rate": 286.02172203587634, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.048562526702881}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 0.96, "temperature": 62, "power": 317.346}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 312.7925083418219, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2468743324279785}, "pipe": "data"} +{"event": "data", "data": {"rate": 294.0299603252281, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.151241302490234}, "pipe": "data"} +{"event": "data", "data": {"rate": 302.1485478730167, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 1.0, "temperature": 62, "power": 282.435}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2709760665893555}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.04177961030547, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 291.1861787133538, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.312800407409668}, "pipe": "data"} +{"event": "data", "data": {"rate": 309.79004052610384, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.3113322257995605}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 0.99, "temperature": 62, "power": 309.235}}}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 31/32 (100%)] Loss: 7.311 (7.24) Time: 0.410s, 312.28/s (0.502s, 254.85/s) LR: 1.000e-05 Data: 0.000 (0.028)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.704 (0.704) Loss: 7.1173 (7.1173) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.334 (0.177) Loss: 7.0511 (7.2336) Acc@1: 0.0000 ( 0.0969) Acc@5: 0.0000 ( 0.5329)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/suvojova.2024-04-09_15:00:22.247229/davit_large.D0/20240409-152722-davit_large-224/checkpoint-0.pth.tar', 0.09689922480620156)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 312.3539844258595, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32722.375, 81920.0], "load": 0.97, "temperature": 63, "power": 326.722}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4642.375, 81920.0], "load": 0.97, "temperature": 62, "power": 190.302}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.413674354553223}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 0/32 ( 0%)] Loss: 7.414 (7.41) Time: 0.911s, 140.47/s (0.911s, 140.47/s) LR: 1.008e-03 Data: 0.501 (0.501)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 0.97, "temperature": 62, "power": 249.176}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.087172031402588}, "pipe": "data"} +{"event": "data", "data": {"rate": 311.1977281723016, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 281.2705846668804, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.022671699523926}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32552.375, 81920.0], "load": 0.99, "temperature": 63, "power": 251.684}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 310.57096181677946, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.015777587890625}, "pipe": "data"} +{"event": "data", "data": {"rate": 284.79817326891686, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.004734516143799}, "pipe": "data"} +{"event": "data", "data": {"rate": 311.5308374015921, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.067770957946777}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32552.375, 81920.0], "load": 0.99, "temperature": 64, "power": 299.981}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 282.93758770049794, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9432783126831055}, "pipe": "data"} +{"event": "data", "data": {"rate": 311.25526012270797, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 283.06646775836606, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.032247543334961}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32552.375, 81920.0], "load": 0.96, "temperature": 64, "power": 296.558}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 310.0801386856677, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.978431701660156}, "pipe": "data"} +{"event": "data", "data": {"rate": 285.53923136321157, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.981602668762207}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.697856608943, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32552.375, 81920.0], "load": 0.99, "temperature": 65, "power": 278.939}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.016844749450684}, "pipe": "data"} +{"event": "data", "data": {"rate": 289.9529798543059, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 31/32 (100%)] Loss: 6.997 (7.05) Time: 0.410s, 311.82/s (0.432s, 295.98/s) LR: 1.008e-03 Data: 0.000 (0.022)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.610 (0.610) Loss: 6.8691 (6.8691) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.7812 ( 0.7812)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.166) Loss: 6.7075 (6.8682) Acc@1: 0.0000 ( 0.2665) Acc@5: 3.1250 ( 1.2597)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/suvojova.2024-04-09_15:00:22.247229/davit_large.D0/20240409-152722-davit_large-224/checkpoint-1.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 311.74054417073813, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32796.375, 81920.0], "load": 0.97, "temperature": 65, "power": 278.61}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32796.375, 81920.0], "load": 0.96, "temperature": 65, "power": 300.534}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88624906539917}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32966.375, 81920.0], "load": 0.28, "temperature": 62, "power": 102.161}}}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 0/32 ( 0%)] Loss: 6.886 (6.89) Time: 0.919s, 139.27/s (0.919s, 139.27/s) LR: 2.006e-03 Data: 0.508 (0.508)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 299.4676588030203, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.914651393890381}, "pipe": "data"} +{"event": "data", "data": {"rate": 311.4624207867216, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931358814239502}, "pipe": "data"} +{"event": "data", "data": {"rate": 281.1566482705762, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33040.375, 81920.0], "load": 1.0, "temperature": 65, "power": 314.648}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.941059112548828}, "pipe": "data"} +{"event": "data", "data": {"rate": 311.8126434365565, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 280.4581242266956, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.992847442626953}, "pipe": "data"} +{"event": "data", "data": {"rate": 311.12521167300474, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33040.375, 81920.0], "load": 0.96, "temperature": 65, "power": 301.953}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.918248176574707}, "pipe": "data"} +{"event": "data", "data": {"rate": 280.97875106426875, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.016817092895508}, "pipe": "data"} +{"event": "data", "data": {"rate": 311.0125243885047, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.894266128540039}, "pipe": "data"} +{"event": "data", "data": {"rate": 278.29856450078216, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33040.375, 81920.0], "load": 1.0, "temperature": 65, "power": 298.773}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.974291801452637}, "pipe": "data"} +{"event": "data", "data": {"rate": 309.24463846624866, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 286.0447146081154, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.100069999694824}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33040.375, 81920.0], "load": 0.99, "temperature": 65, "power": 215.203}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 309.44904546017773, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03318452835083}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 31/32 (100%)] Loss: 6.953 (6.96) Time: 0.411s, 311.80/s (0.433s, 295.38/s) LR: 2.006e-03 Data: 0.000 (0.023)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 288.7844378389191, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.582 (0.582) Loss: 6.7466 (6.7466) Acc@1: 0.0000 ( 0.0000) Acc@5: 3.9062 ( 3.9062)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.042 (0.165) Loss: 6.5149 (6.8255) Acc@1: 0.0000 ( 0.1938) Acc@5: 6.2500 ( 1.1870)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 311.75706753332986, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33284.375, 81920.0], "load": 0.96, "temperature": 66, "power": 315.343}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33284.375, 81920.0], "load": 0.97, "temperature": 67, "power": 261.696}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33284.375, 81920.0], "load": 0, "temperature": 61, "power": 78.935}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87822151184082}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 0/32 ( 0%)] Loss: 6.878 (6.88) Time: 0.929s, 137.74/s (0.929s, 137.74/s) LR: 3.004e-03 Data: 0.519 (0.519)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 246.98631447673736, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.851591110229492}, "pipe": "data"} +{"event": "data", "data": {"rate": 279.5437749499644, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912532806396484}, "pipe": "data"} +{"event": "data", "data": {"rate": 310.92425847898284, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0.98, "temperature": 65, "power": 275.297}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 280.86647641502975, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.895297527313232}, "pipe": "data"} +{"event": "data", "data": {"rate": 310.9109908048949, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.910676956176758}, "pipe": "data"} +{"event": "data", "data": {"rate": 280.2216716739634, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0.99, "temperature": 66, "power": 339.693}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.976223468780518}, "pipe": "data"} +{"event": "data", "data": {"rate": 311.36971400154175, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.902081489562988}, "pipe": "data"} +{"event": "data", "data": {"rate": 278.09708761054304, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.997932434082031}, "pipe": "data"} +{"event": "data", "data": {"rate": 310.1640439778011, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0.96, "temperature": 67, "power": 276.111}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 280.53680366710694, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.035996913909912}, "pipe": "data"} +{"event": "data", "data": {"rate": 310.74508533827196, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98294734954834}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0.96, "temperature": 67, "power": 296.461}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 279.1795216158785, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.014934539794922}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 31/32 (100%)] Loss: 6.994 (6.96) Time: 0.411s, 311.34/s (0.434s, 294.66/s) LR: 3.004e-03 Data: 0.000 (0.023)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 307.27534022611934, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.589 (0.589) Loss: 6.8195 (6.8195) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.7812 ( 0.7812)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.042 (0.165) Loss: 6.3625 (6.8153) Acc@1: 0.0000 ( 0.1938) Acc@5: 12.5000 ( 1.0174)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 277.7018316301605, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33772.375, 81920.0], "load": 0.97, "temperature": 67, "power": 199.047}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33772.375, 81920.0], "load": 0.97, "temperature": 67, "power": 310.408}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33772.375, 81920.0], "load": 0, "temperature": 62, "power": 80.829}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.863361358642578}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 0/32 ( 0%)] Loss: 6.863 (6.86) Time: 0.927s, 138.13/s (0.927s, 138.13/s) LR: 4.002e-03 Data: 0.516 (0.516)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 282.2410795940031, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.827474594116211}, "pipe": "data"} +{"event": "data", "data": {"rate": 278.05101841703066, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.884098052978516}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.97, "temperature": 67, "power": 283.366}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 310.3608702834013, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912851333618164}, "pipe": "data"} +{"event": "data", "data": {"rate": 279.5411644522195, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912323951721191}, "pipe": "data"} +{"event": "data", "data": {"rate": 310.6239581913095, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.98, "temperature": 68, "power": 332.325}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 279.60279063987326, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.997890472412109}, "pipe": "data"} +{"event": "data", "data": {"rate": 309.7544682808036, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043225288391113}, "pipe": "data"} +{"event": "data", "data": {"rate": 278.6342776580607, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.95, "temperature": 69, "power": 336.454}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0753374099731445}, "pipe": "data"} +{"event": "data", "data": {"rate": 309.53892489987805, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.008981704711914}, "pipe": "data"} +{"event": "data", "data": {"rate": 279.16128304314066, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.045340538024902}, "pipe": "data"} +{"event": "data", "data": {"rate": 309.6451246169102, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.97, "temperature": 69, "power": 310.292}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 279.51320630731203, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "stop", "data": null, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/suvojova.2024-04-09_15:00:22.247229/davit_large.D0", "--checkpoint-hist", "1"], "time": 1712676544.0184379, "return_code": -15}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/dlrm.0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/dlrm.0.data new file mode 100644 index 000000000..2c9021198 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/dlrm.0.data @@ -0,0 +1,282 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}, {"aliaslist": [], "hostname": "majorpossum.eastus2.cloudapp.azure.com", "ip": "majorpossum.eastus2.cloudapp.azure.com", "ipaddrlist": ["40.70.216.57"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": false, "main": false, "name": "node1", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/dlrm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "dlrm", "install_group": "torch", "install_variant": "cuda", "run_name": "suvojova.2024-04-09_15:00:22.247229", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f837b72a6bbb71a07eed38e1458b35b0", "tags": ["nlp", "rl"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/dlrm", "plan": {"method": "njobs", "n": 1}, "argv": {"--num-batches": 1000, "--data-generation": "random", "--arch-mlp-bot": "512-512-64", "--arch-mlp-top": "1024-1024-1024-1", "--arch-sparse-feature-size": 64, "--arch-embedding-size": "1000000-1000000-1000000-1000000-1000000-1000000-1000000-1000000", "--num-indices-per-lookup": 100, "--arch-interaction-op": "dot", "--numpy-rand-seed": "727", "--print-freq": 999999, "--mini-batch-size": 16384, "--test-mini-batch-size": 16384, "--test-num-workers": 0, "--use-gpu": true}, "weight": 1.0, "name": "dlrm", "tag": ["dlrm", "0"], "job-number": 0, "devices": ["0"]}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "realbird", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-e5df2666-9a32-cf33-2409-1971e6e097bd": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 67, "power": 90.489, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712676954.49083, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/dlrm/voirconf-dlrm.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/dlrm/dlrm/dlrm_s_pytorch.py", "--num-batches", "1000", "--data-generation", "random", "--arch-mlp-bot", "512-512-64", "--arch-mlp-top", "1024-1024-1024-1", "--arch-sparse-feature-size", "64", "--arch-embedding-size", "1000000-1000000-1000000-1000000-1000000-1000000-1000000-1000000", "--num-indices-per-lookup", "100", "--arch-interaction-op", "dot", "--numpy-rand-seed", "727", "--print-freq", "999999", "--mini-batch-size", "16384", "--test-mini-batch-size", "16384", "--test-num-workers", "0", "--use-gpu"], "time": 1712676954.50619}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "line", "data": "Unable to import mlperf_logging, No module named 'mlperf_logging'\n", "pipe": "stdout"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:347: UserWarning: torch.distributed.reduce_op is deprecated, please use torch.distributed.ReduceOp instead\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "world size: 1, current rank: 0, local rank: 0\n", "pipe": "stdout"} +{"event": "line", "data": "Using 1 GPU(s)...\n", "pipe": "stdout"} +{"event": "line", "data": "time/loss/accuracy (if enabled):\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 62, "power": 53.561}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 61, "power": 52.732}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3298.375, 81920.0], "load": 0, "temperature": 61, "power": 78.806}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08395528793334961}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5700.375, 81920.0], "load": 0, "temperature": 60, "power": 78.212}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08342313766479492}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5702.375, 81920.0], "load": 0, "temperature": 60, "power": 77.685}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0849374458193779}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6106.375, 81920.0], "load": 0, "temperature": 59, "power": 183.982}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 416688.88846331555, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6106.375, 81920.0], "load": 0, "temperature": 58, "power": 76.844}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08372959494590759}, "pipe": "data"} +{"event": "data", "data": {"rate": 412866.2051925888, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6308.375, 81920.0], "load": 0, "temperature": 58, "power": 75.925}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08384181559085846}, "pipe": "data"} +{"event": "data", "data": {"rate": 419032.6876954545, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6308.375, 81920.0], "load": 0, "temperature": 57, "power": 75.536}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08388040214776993}, "pipe": "data"} +{"event": "data", "data": {"rate": 413702.0836959918, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6308.375, 81920.0], "load": 0, "temperature": 57, "power": 75.147}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08414746820926666}, "pipe": "data"} +{"event": "data", "data": {"rate": 417301.6674110296, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6308.375, 81920.0], "load": 0, "temperature": 56, "power": 74.657}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6308.375, 81920.0], "load": 0, "temperature": 55, "power": 74.315}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08342362195253372}, "pipe": "data"} +{"event": "data", "data": {"rate": 420173.49800797144, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 55, "power": 74.085}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08308812975883484}, "pipe": "data"} +{"event": "data", "data": {"rate": 416241.0108620256, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 55, "power": 73.634}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08345107734203339}, "pipe": "data"} +{"event": "data", "data": {"rate": 417677.2728243778, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 54, "power": 73.315}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08459055423736572}, "pipe": "data"} +{"event": "data", "data": {"rate": 419187.48856342724, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 54, "power": 73.039}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 53, "power": 72.824}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08355697244405746}, "pipe": "data"} +{"event": "data", "data": {"rate": 414384.51468539616, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 53, "power": 72.181}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08368614315986633}, "pipe": "data"} +{"event": "data", "data": {"rate": 418252.9423471608, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 52, "power": 71.814}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08400876820087433}, "pipe": "data"} +{"event": "data", "data": {"rate": 420995.94989253994, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 52, "power": 70.751}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08406656980514526}, "pipe": "data"} +{"event": "data", "data": {"rate": 410838.5343876918, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 51, "power": 70.25}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 51, "power": 70.102}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08438296616077423}, "pipe": "data"} +{"event": "data", "data": {"rate": 416346.3513737549, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 50, "power": 69.913}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08476312458515167}, "pipe": "data"} +{"event": "data", "data": {"rate": 415952.19311961654, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 50, "power": 69.455}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08354128897190094}, "pipe": "data"} +{"event": "data", "data": {"rate": 413826.99445213185, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 49, "power": 69.064}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08408722281455994}, "pipe": "data"} +{"event": "data", "data": {"rate": 411921.7454278861, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 49, "power": 68.905}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 48, "power": 68.689}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08347434550523758}, "pipe": "data"} +{"event": "data", "data": {"rate": 414256.57531176443, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 48, "power": 68.563}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08404223620891571}, "pipe": "data"} +{"event": "data", "data": {"rate": 419456.52576581604, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 47, "power": 68.155}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08309965580701828}, "pipe": "data"} +{"event": "data", "data": {"rate": 416256.1332930187, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 47, "power": 67.692}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08461501449346542}, "pipe": "data"} +{"event": "data", "data": {"rate": 413668.4394436392, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 47, "power": 67.756}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 46, "power": 67.52}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08360610902309418}, "pipe": "data"} +{"event": "data", "data": {"rate": 412374.4553354973, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 46, "power": 67.53}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08310449123382568}, "pipe": "data"} +{"event": "data", "data": {"rate": 415140.17011219834, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 45, "power": 67.291}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08348263800144196}, "pipe": "data"} +{"event": "data", "data": {"rate": 414601.0001844245, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 45, "power": 67.2}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08347773551940918}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 45, "power": 66.816}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 419189.25819338585, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 44, "power": 66.825}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0839482992887497}, "pipe": "data"} +{"event": "data", "data": {"rate": 414097.0525813279, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 44, "power": 66.69}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08302360773086548}, "pipe": "data"} +{"event": "data", "data": {"rate": 416215.01976830815, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 44, "power": 66.478}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08247873932123184}, "pipe": "data"} +{"event": "data", "data": {"rate": 417567.6820148701, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 43, "power": 66.19}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08412615209817886}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 43, "power": 66.167}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 419918.1123799185, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 43, "power": 65.66}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08320017158985138}, "pipe": "data"} +{"event": "data", "data": {"rate": 415693.15755717043, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 43, "power": 65.792}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08296072483062744}, "pipe": "data"} +{"event": "data", "data": {"rate": 418208.12419311027, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 42, "power": 65.799}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08382801711559296}, "pipe": "data"} +{"event": "data", "data": {"rate": 418956.5889221424, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 42, "power": 65.424}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08423185348510742}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0.03, "temperature": 42, "power": 64.753}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 415889.79245413863, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 42, "power": 64.446}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08362126350402832}, "pipe": "data"} +{"event": "data", "data": {"rate": 421587.7203518899, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 41, "power": 64.657}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08386819064617157}, "pipe": "data"} +{"event": "data", "data": {"rate": 414447.11442081945, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 41, "power": 64.636}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08274102210998535}, "pipe": "data"} +{"event": "data", "data": {"rate": 422632.57272367965, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 41, "power": 64.299}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08267556130886078}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 41, "power": 64.398}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 415192.4029318604, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 41, "power": 64.279}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0836000144481659}, "pipe": "data"} +{"event": "data", "data": {"rate": 411268.0724567793, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 40, "power": 64.077}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08373723179101944}, "pipe": "data"} +{"event": "data", "data": {"rate": 421843.92500848824, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 40, "power": 63.939}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0839206874370575}, "pipe": "data"} +{"event": "data", "data": {"rate": 423744.430175083, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 40, "power": 64.237}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08366947621107101}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 40, "power": 64.322}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 419273.906852815, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 40, "power": 63.896}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08206789195537567}, "pipe": "data"} +{"event": "data", "data": {"rate": 419008.14705385535, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 40, "power": 64.197}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08495134860277176}, "pipe": "data"} +{"event": "data", "data": {"rate": 413408.22353838524, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 39, "power": 63.761}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0824621394276619}, "pipe": "data"} +{"event": "data", "data": {"rate": 414804.5947305436, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 39, "power": 63.708}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08413124084472656}, "pipe": "data"} +{"event": "data", "data": {"rate": 416930.1320373755, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 39, "power": 64.171}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08249908685684204}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 41, "power": 63.374}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 420352.2317717118, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 39, "power": 63.22}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08293187618255615}, "pipe": "data"} +{"event": "data", "data": {"rate": 413927.7386862867, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 39, "power": 63.6}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08295343071222305}, "pipe": "data"} +{"event": "data", "data": {"rate": 415675.32347400044, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 39, "power": 63.358}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08349806070327759}, "pipe": "data"} +{"event": "data", "data": {"rate": 416695.1940781265, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 39, "power": 63.317}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08397036790847778}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0.18, "temperature": 39, "power": 63.26}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 420817.9762425855, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 38, "power": 63.235}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08261168003082275}, "pipe": "data"} +{"event": "data", "data": {"rate": 420822.29971886793, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 38, "power": 63.162}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08328827470541}, "pipe": "data"} +{"event": "data", "data": {"rate": 415250.879303368, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 38, "power": 62.974}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08417266607284546}, "pipe": "data"} +{"event": "data", "data": {"rate": 414908.3792955451, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 38, "power": 63.276}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08396507054567337}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0.18, "temperature": 38, "power": 62.999}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 420534.2586935076, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 38, "power": 62.984}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08414334058761597}, "pipe": "data"} +{"event": "data", "data": {"rate": 419018.33803680394, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 38, "power": 62.959}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0827147513628006}, "pipe": "data"} +{"event": "data", "data": {"rate": 420521.72722112807, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 38, "power": 62.829}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08464237302541733}, "pipe": "data"} +{"event": "data", "data": {"rate": 415745.6140284101, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 38, "power": 62.862}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08371055126190186}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0.17, "temperature": 38, "power": 63.155}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 418359.32515760546, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 37, "power": 62.846}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08391377329826355}, "pipe": "data"} +{"event": "data", "data": {"rate": 412665.90131381096, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 37, "power": 62.983}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08393114805221558}, "pipe": "data"} +{"event": "data", "data": {"rate": 415864.34131558164, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 37, "power": 62.793}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08339859545230865}, "pipe": "data"} +{"event": "data", "data": {"rate": 418010.0486717389, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 37, "power": 62.976}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 37, "power": 62.878}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/dlrm/voirconf-dlrm.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/dlrm/dlrm/dlrm_s_pytorch.py", "--num-batches", "1000", "--data-generation", "random", "--arch-mlp-bot", "512-512-64", "--arch-mlp-top", "1024-1024-1024-1", "--arch-sparse-feature-size", "64", "--arch-embedding-size", "1000000-1000000-1000000-1000000-1000000-1000000-1000000-1000000", "--num-indices-per-lookup", "100", "--arch-interaction-op", "dot", "--numpy-rand-seed", "727", "--print-freq", "999999", "--mini-batch-size", "16384", "--test-mini-batch-size", "16384", "--test-num-workers", "0", "--use-gpu"], "time": 1712677205.303504, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/focalnet.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/focalnet.D0.data new file mode 100644 index 000000000..477f17569 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/focalnet.D0.data @@ -0,0 +1,264 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}, {"aliaslist": [], "hostname": "majorpossum.eastus2.cloudapp.azure.com", "ip": "majorpossum.eastus2.cloudapp.azure.com", "ipaddrlist": ["40.70.216.57"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": false, "main": false, "name": "node1", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "suvojova.2024-04-09_15:00:22.247229", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f837b72a6bbb71a07eed38e1458b35b0", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "focalnet_base_lrf"}, "tags": ["classification", "convnet", "vision"], "weight": 2.0, "name": "focalnet", "tag": ["focalnet", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "realbird", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-e5df2666-9a32-cf33-2409-1971e6e097bd": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.41, "memory": 0.010771942138671876}, "temperature": 71, "power": 95.24, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712676657.51347, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-focalnet.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "focalnet_base_lrf", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/suvojova.2024-04-09_15:00:22.247229/focalnet.D0", "--checkpoint-hist", "1"], "time": 1712676657.5295565}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model focalnet_base_lrf created, param count:88749768\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.9\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.05) calculated from base learning rate (0.1) and global batch size (128) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 7.004467010498047}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4344.375, 81920.0], "load": 1.0, "temperature": 71, "power": 264.03}}}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 0/32 ( 0%)] Loss: 7.004 (7.00) Time: 11.484s, 11.15/s (11.484s, 11.15/s) LR: 1.000e-05 Data: 0.685 (0.685)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [21814.375, 81920.0], "load": 1.0, "temperature": 68, "power": 148.34}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [11438.375, 81920.0], "load": 1.0, "temperature": 69, "power": 215.524}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7618.375, 81920.0], "load": 1.0, "temperature": 68, "power": 187.357}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.006728649139404}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.935497760772705}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23794.375, 81920.0], "load": 0.95, "temperature": 71, "power": 297.649}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.995425701141357}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.060293197631836}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23798.375, 81920.0], "load": 0.96, "temperature": 71, "power": 302.359}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.056240081787109}, "pipe": "data"} +{"event": "data", "data": {"rate": 377.14681247005285, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.029513359069824}, "pipe": "data"} +{"event": "data", "data": {"rate": 388.30145990124583, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.975472450256348}, "pipe": "data"} +{"event": "data", "data": {"rate": 373.77115978519004, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23798.375, 81920.0], "load": 0.95, "temperature": 71, "power": 288.519}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 363.8052247271533, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.069397926330566}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 31/32 (100%)] Loss: 7.005 (7.00) Time: 0.321s, 398.32/s (0.678s, 188.73/s) LR: 1.000e-05 Data: 0.000 (0.029)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.698 (0.698) Loss: 6.9615 (6.9615) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.990 (0.160) Loss: 6.8639 (6.9459) Acc@1: 0.0000 ( 0.1453) Acc@5: 3.1250 ( 0.6541)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/suvojova.2024-04-09_15:00:22.247229/focalnet.D0/20240409-153102-focalnet_base_lrf-224/checkpoint-0.pth.tar', 0.14534883720930233)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 397.4883040134017, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24042.375, 81920.0], "load": 0.89, "temperature": 71, "power": 302.113}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24042.375, 81920.0], "load": 0.96, "temperature": 72, "power": 306.559}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.020679950714111}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 0/32 ( 0%)] Loss: 7.021 (7.02) Time: 0.849s, 150.70/s (0.849s, 150.70/s) LR: 1.001e-02 Data: 0.525 (0.525)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 383.68139825683403, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23878.375, 81920.0], "load": 0.98, "temperature": 71, "power": 329.007}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.986495018005371}, "pipe": "data"} +{"event": "data", "data": {"rate": 372.89610922705026, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 359.05037436920253, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.065654754638672}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23882.375, 81920.0], "load": 0.99, "temperature": 72, "power": 274.907}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 396.7272478498257, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077932357788086}, "pipe": "data"} +{"event": "data", "data": {"rate": 379.88182835917075, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.056328773498535}, "pipe": "data"} +{"event": "data", "data": {"rate": 377.5619146626419, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23882.375, 81920.0], "load": 0.97, "temperature": 72, "power": 297.792}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.036379337310791}, "pipe": "data"} +{"event": "data", "data": {"rate": 391.9728368621986, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 373.66297948850735, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.098394393920898}, "pipe": "data"} +{"event": "data", "data": {"rate": 359.1547094134149, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.144105911254883}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23882.375, 81920.0], "load": 0.96, "temperature": 73, "power": 301.427}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 396.72734560535355, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 31/32 (100%)] Loss: 7.108 (7.05) Time: 0.322s, 398.02/s (0.346s, 370.34/s) LR: 1.001e-02 Data: 0.000 (0.023)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.559 (0.559) Loss: 6.8922 (6.8922) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.031 (0.127) Loss: 6.9393 (6.9700) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 0.9932)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/suvojova.2024-04-09_15:00:22.247229/focalnet.D0/20240409-153102-focalnet_base_lrf-224/checkpoint-1.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 397.8027390570853, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24126.375, 81920.0], "load": 0.93, "temperature": 73, "power": 305.31}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24126.375, 81920.0], "load": 0.03, "temperature": 68, "power": 87.462}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.995872497558594}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 0/32 ( 0%)] Loss: 6.996 (7.00) Time: 0.819s, 156.36/s (0.819s, 156.36/s) LR: 2.001e-02 Data: 0.497 (0.497)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 393.79404883512296, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.097558975219727}, "pipe": "data"} +{"event": "data", "data": {"rate": 373.58258753196174, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24370.375, 81920.0], "load": 0.96, "temperature": 72, "power": 225.831}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0096845626831055}, "pipe": "data"} +{"event": "data", "data": {"rate": 357.32310923728306, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.010942459106445}, "pipe": "data"} +{"event": "data", "data": {"rate": 390.4976926040481, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.141146659851074}, "pipe": "data"} +{"event": "data", "data": {"rate": 372.7775852224069, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24370.375, 81920.0], "load": 0.98, "temperature": 73, "power": 268.436}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 391.1205375144287, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.252418041229248}, "pipe": "data"} +{"event": "data", "data": {"rate": 374.0523343926758, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.19024133682251}, "pipe": "data"} +{"event": "data", "data": {"rate": 361.1389649203707, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24370.375, 81920.0], "load": 0.99, "temperature": 73, "power": 275.308}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.219806671142578}, "pipe": "data"} +{"event": "data", "data": {"rate": 397.0465324554106, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.261203765869141}, "pipe": "data"} +{"event": "data", "data": {"rate": 371.4730459007951, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 31/32 (100%)] Loss: 7.261 (7.13) Time: 0.324s, 394.96/s (0.344s, 371.83/s) LR: 2.001e-02 Data: 0.000 (0.022)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.542 (0.542) Loss: 6.9301 (6.9301) Acc@1: 0.0000 ( 0.0000) Acc@5: 6.2500 ( 6.2500)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.031 (0.126) Loss: 6.7457 (7.1150) Acc@1: 0.0000 ( 0.1696) Acc@5: 0.0000 ( 0.8479)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 394.9616287834952, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24614.375, 81920.0], "load": 0.94, "temperature": 72, "power": 249.288}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24614.375, 81920.0], "load": 0.97, "temperature": 73, "power": 312.643}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.089291095733643}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 0/32 ( 0%)] Loss: 7.089 (7.09) Time: 0.823s, 155.46/s (0.823s, 155.46/s) LR: 3.000e-02 Data: 0.502 (0.502)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 397.555071098811, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.231501579284668}, "pipe": "data"} +{"event": "data", "data": {"rate": 382.15443825989075, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24858.375, 81920.0], "load": 0.96, "temperature": 73, "power": 205.602}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 376.3459107385317, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0426130294799805}, "pipe": "data"} +{"event": "data", "data": {"rate": 381.986974482912, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.210793495178223}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24858.375, 81920.0], "load": 0.96, "temperature": 73, "power": 326.419}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 373.93244914980266, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.3732404708862305}, "pipe": "data"} +{"event": "data", "data": {"rate": 370.57473547448944, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.324423789978027}, "pipe": "data"} +{"event": "data", "data": {"rate": 392.78879179883023, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24858.375, 81920.0], "load": 0.99, "temperature": 73, "power": 288.878}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.279972076416016}, "pipe": "data"} +{"event": "data", "data": {"rate": 369.941465558819, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 390.51489119925634, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.347404479980469}, "pipe": "data"} +{"event": "data", "data": {"rate": 374.1754123965538, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.239471435546875}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 31/32 (100%)] Loss: 7.239 (7.23) Time: 0.322s, 398.00/s (0.344s, 371.85/s) LR: 3.000e-02 Data: 0.000 (0.022)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24858.375, 81920.0], "load": 0.99, "temperature": 73, "power": 299.884}}}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.538 (0.538) Loss: 7.1712 (7.1712) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.7812 ( 0.7812)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.031 (0.126) Loss: 6.2444 (7.1748) Acc@1: 0.0000 ( 0.2180) Acc@5: 25.0000 ( 1.0174)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 398.34483158110197, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25102.375, 81920.0], "load": 0.93, "temperature": 73, "power": 297.146}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.215520858764648}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 0/32 ( 0%)] Loss: 7.216 (7.22) Time: 0.841s, 152.27/s (0.841s, 152.27/s) LR: 4.000e-02 Data: 0.520 (0.520)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25272.375, 81920.0], "load": 0, "temperature": 71, "power": 341.498}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.95527413693515, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.295672416687012}, "pipe": "data"} +{"event": "data", "data": {"rate": 397.1141620768245, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.311964988708496}, "pipe": "data"} +{"event": "data", "data": {"rate": 378.76365907568277, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25346.375, 81920.0], "load": 0.96, "temperature": 72, "power": 306.728}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 381.6532007292009, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.187459945678711}, "pipe": "data"} +{"event": "data", "data": {"rate": 384.2980708068715, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.408051490783691}, "pipe": "data"} +{"event": "data", "data": {"rate": 374.21391251470453, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25346.375, 81920.0], "load": 0.95, "temperature": 72, "power": 300.052}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.321699142456055}, "pipe": "data"} +{"event": "data", "data": {"rate": 370.63318569028473, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.464118003845215}, "pipe": "data"} +{"event": "data", "data": {"rate": 395.8558019997688, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.367799758911133}, "pipe": "data"} +{"event": "data", "data": {"rate": 371.22613074345924, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25346.375, 81920.0], "load": 0.95, "temperature": 72, "power": 299.992}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 396.7494433822282, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.404073715209961}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 31/32 (100%)] Loss: 7.404 (7.33) Time: 0.322s, 397.08/s (0.345s, 371.21/s) LR: 4.000e-02 Data: 0.000 (0.023)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.538 (0.538) Loss: 6.9695 (6.9695) Acc@1: 0.0000 ( 0.0000) Acc@5: 4.6875 ( 4.6875)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.031 (0.125) Loss: 6.8203 (7.2576) Acc@1: 0.0000 ( 0.1211) Acc@5: 3.1250 ( 0.7025)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 357.44762163267814, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25590.375, 81920.0], "load": 0.93, "temperature": 72, "power": 219.538}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25590.375, 81920.0], "load": 0.04, "temperature": 67, "power": 86.765}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.211348533630371}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 0/32 ( 0%)] Loss: 7.211 (7.21) Time: 0.831s, 154.11/s (0.831s, 154.11/s) LR: 4.997e-02 Data: 0.510 (0.510)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 393.9797826966034, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.400300025939941}, "pipe": "data"} +{"event": "data", "data": {"rate": 397.69098285135897, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25834.375, 81920.0], "load": 0.99, "temperature": 71, "power": 201.621}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.457738399505615}, "pipe": "data"} +{"event": "data", "data": {"rate": 377.6524500968823, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.70676326751709}, "pipe": "data"} +{"event": "data", "data": {"rate": 385.70176168536364, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 386.4323715122176, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25834.375, 81920.0], "load": 0.99, "temperature": 71, "power": 316.425}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.551183223724365}, "pipe": "data"} +{"event": "data", "data": {"rate": 373.284545717194, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.540083885192871}, "pipe": "data"} +{"event": "data", "data": {"rate": 367.8090433376149, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.490669250488281}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25834.375, 81920.0], "load": 0.96, "temperature": 71, "power": 296.383}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 388.33608009055445, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.45649528503418}, "pipe": "data"} +{"event": "data", "data": {"rate": 380.6745187659452, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 31/32 (100%)] Loss: 7.408 (7.45) Time: 0.320s, 400.44/s (0.344s, 372.15/s) LR: 4.997e-02 Data: 0.000 (0.023)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 375.6656259274429, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.538 (0.538) Loss: 7.3016 (7.3016) Acc@1: 0.0000 ( 0.0000) Acc@5: 6.2500 ( 6.2500)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.031 (0.125) Loss: 6.8042 (7.2021) Acc@1: 0.0000 ( 0.2422) Acc@5: 0.0000 ( 0.9932)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [26078.375, 81920.0], "load": 0.15, "temperature": 70, "power": 307.268}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 400.3132200751325, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [26078.375, 81920.0], "load": 0.94, "temperature": 71, "power": 301.058}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.191986083984375}, "pipe": "data"} +{"event": "line", "data": "Train: 6 [ 0/32 ( 0%)] Loss: 7.192 (7.19) Time: 0.838s, 152.77/s (0.838s, 152.77/s) LR: 4.995e-02 Data: 0.517 (0.517)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [26322.375, 81920.0], "load": 0.95, "temperature": 70, "power": 293.222}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 398.46011601639447, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "stop", "data": null, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-focalnet.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "focalnet_base_lrf", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/suvojova.2024-04-09_15:00:22.247229/focalnet.D0", "--checkpoint-hist", "1"], "time": 1712676773.5042932, "return_code": -15}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/fp16.D0.data new file mode 100644 index 000000000..65d4d6fd9 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/fp16.D0.data @@ -0,0 +1,141 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}, {"aliaslist": [], "hostname": "majorpossum.eastus2.cloudapp.azure.com", "ip": "majorpossum.eastus2.cloudapp.azure.com", "ipaddrlist": ["40.70.216.57"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": false, "main": false, "name": "node1", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "suvojova.2024-04-09_15:00:22.247229", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f837b72a6bbb71a07eed38e1458b35b0", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 30, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp16"}, "weight": 0.0, "name": "fp16", "tag": ["fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "realbird", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-e5df2666-9a32-cf33-2409-1971e6e097bd": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.01, "memory": 0.010771942138671876}, "temperature": 68, "power": 91.665, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712675142.868991, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "30", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp16"], "time": 1712675142.8795474}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 222.46256349896146, "units": "Tflops", "t": 1712675144.829849}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [882.5, 81920.0], "load": 0, "temperature": 65, "power": 55.194}}, "t": 1712675144.1655538}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 0.05, "temperature": 69, "power": 255.2}}, "t": 1712675144.6820068}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.07571838358504, "units": "Tflops", "t": 1712675145.0902915}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 255.83902375274909, "units": "Tflops", "t": 1712675145.3482106}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 303.192}}, "t": 1712675145.1916387}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.97100970780446, "units": "Tflops", "t": 1712675145.6081107}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.26628429447115, "units": "Tflops", "t": 1712675145.8717678}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 70, "power": 274.085}}, "t": 1712675145.698911}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.11878658606307, "units": "Tflops", "t": 1712675146.1356556}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.943738992125, "units": "Tflops", "t": 1712675146.395519}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 70, "power": 231.847}}, "t": 1712675146.2098362}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.94252287455885, "units": "Tflops", "t": 1712675146.6564398}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.09998393459665, "units": "Tflops", "t": 1712675146.9161303}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 274.427}}, "t": 1712675146.7184474}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.63617282209066, "units": "Tflops", "t": 1712675147.1753476}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.69913129656192, "units": "Tflops", "t": 1712675147.4375124}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 298.517}}, "t": 1712675147.2270117}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.04962213082888, "units": "Tflops", "t": 1712675147.7014656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.74416524356243, "units": "Tflops", "t": 1712675147.9615164}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 70, "power": 246.018}}, "t": 1712675147.7351673}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 255.1710995786912, "units": "Tflops", "t": 1712675148.2201538}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 255.5755423504938, "units": "Tflops", "t": 1712675148.478331}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 70, "power": 239.984}}, "t": 1712675148.2422655}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.24155022215146, "units": "Tflops", "t": 1712675148.7399685}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.17363142320835, "units": "Tflops", "t": 1712675149.0026724}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 285.547}}, "t": 1712675148.7513824}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.80734923714047, "units": "Tflops", "t": 1712675149.261682}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.65809846663845, "units": "Tflops", "t": 1712675149.5218282}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 235.891}}, "t": 1712675149.2730494}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.6945345429907, "units": "Tflops", "t": 1712675149.780971}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 255.56468392188387, "units": "Tflops", "t": 1712675150.0391695}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 268.823}}, "t": 1712675149.7815487}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.48590773192285, "units": "Tflops", "t": 1712675150.2995403}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 244.356}}, "t": 1712675150.290192}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.99340236355675, "units": "Tflops", "t": 1712675150.5604005}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.59708433315222, "units": "Tflops", "t": 1712675150.8216286}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 297.939}}, "t": 1712675150.7998822}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.83611182469812, "units": "Tflops", "t": 1712675151.0816236}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.43106446415985, "units": "Tflops", "t": 1712675151.3409736}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 255.382}}, "t": 1712675151.3126838}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.87384083327822, "units": "Tflops", "t": 1712675151.6009367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.84613925179357, "units": "Tflops", "t": 1712675151.8619053}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 301.17}}, "t": 1712675151.8230698}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.52144213023388, "units": "Tflops", "t": 1712675152.122225}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.38475013090903, "units": "Tflops", "t": 1712675152.381636}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 254.742}}, "t": 1712675152.333546}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 255.7997625108793, "units": "Tflops", "t": 1712675152.6396377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.1016173593225, "units": "Tflops", "t": 1712675152.8993251}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 239.725}}, "t": 1712675152.8434098}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 255.22335049171406, "units": "Tflops", "t": 1712675153.157915}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.5014245744299, "units": "Tflops", "t": 1712675153.4192395}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 296.944}}, "t": 1712675153.3620944}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.33425175105126, "units": "Tflops", "t": 1712675153.6808019}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.22395052038357, "units": "Tflops", "t": 1712675153.9403777}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 288.485}}, "t": 1712675153.8707974}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 255.44718866698418, "units": "Tflops", "t": 1712675154.1987379}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 256.6025126962007, "units": "Tflops", "t": 1712675154.4558845}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 244.424}}, "t": 1712675154.3779328}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.37991377136723, "units": "Tflops", "t": 1712675154.7184217}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.40353482565192, "units": "Tflops", "t": 1712675154.9798484}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 264.461}}, "t": 1712675154.8851478}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.30685474092732, "units": "Tflops", "t": 1712675155.2424672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 256.4662318767959, "units": "Tflops", "t": 1712675155.499754}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 300.55}}, "t": 1712675155.393655}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.86860653282815, "units": "Tflops", "t": 1712675155.7587016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 255.53919384569937, "units": "Tflops", "t": 1712675156.0169306}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 257.954}}, "t": 1712675155.9033494}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 248.51083725944375, "units": "Tflops", "t": 1712675156.2825077}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.68241860208448, "units": "Tflops", "t": 1712675156.5446825}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 252.985}}, "t": 1712675156.4115171}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.68609504955938, "units": "Tflops", "t": 1712675156.8038275}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 255.44176478928983, "units": "Tflops", "t": 1712675157.0621443}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 301.435}}, "t": 1712675156.9205024}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.57120981482115, "units": "Tflops", "t": 1712675157.3244984}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.7909762438834, "units": "Tflops", "t": 1712675157.5865712}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 257.504}}, "t": 1712675157.429132}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.0086669796112, "units": "Tflops", "t": 1712675157.8494992}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.50050290519752, "units": "Tflops", "t": 1712675158.110825}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 241.894}}, "t": 1712675157.9371147}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.9796958001263, "units": "Tflops", "t": 1712675158.3696733}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.81096538172685, "units": "Tflops", "t": 1712675158.629654}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 273.937}}, "t": 1712675158.4457252}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 255.12922015396506, "units": "Tflops", "t": 1712675158.8883545}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.1797496446561, "units": "Tflops", "t": 1712675159.1489787}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 290.233}}, "t": 1712675158.9546473}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.21350016146636, "units": "Tflops", "t": 1712675159.4106565}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.20993412068194, "units": "Tflops", "t": 1712675159.6744015}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 242.838}}, "t": 1712675159.4632347}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.35248041406174, "units": "Tflops", "t": 1712675159.93388}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.05078197177883, "units": "Tflops", "t": 1712675160.194633}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 254.38}}, "t": 1712675159.9723659}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.46499222504954, "units": "Tflops", "t": 1712675160.4540043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.41486013291518, "units": "Tflops", "t": 1712675160.716457}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 300.564}}, "t": 1712675160.4812043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.01003319781384, "units": "Tflops", "t": 1712675160.9793792}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 256.4557730164089, "units": "Tflops", "t": 1712675161.2366767}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 279.872}}, "t": 1712675160.989756}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 255.82435850544772, "units": "Tflops", "t": 1712675161.4946778}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.32956920928677, "units": "Tflops", "t": 1712675161.7541351}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 238.398}}, "t": 1712675161.496837}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 249.3272707254278, "units": "Tflops", "t": 1712675162.018865}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 290.139}}, "t": 1712675162.005559}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 248.0579137992445, "units": "Tflops", "t": 1712675162.2849166}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.00877883120035, "units": "Tflops", "t": 1712675162.544691}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 291.207}}, "t": 1712675162.5146532}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 256.29020887114524, "units": "Tflops", "t": 1712675162.8022358}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.12752161557498, "units": "Tflops", "t": 1712675163.061889}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 253.211}}, "t": 1712675163.0254722}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 256.1514139236208, "units": "Tflops", "t": 1712675163.3195553}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 248.39706117583984, "units": "Tflops", "t": 1712675163.5851996}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 234.081}}, "t": 1712675163.5342078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.11422549289924, "units": "Tflops", "t": 1712675163.8469791}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.29407377647297, "units": "Tflops", "t": 1712675164.1095588}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 303.51}}, "t": 1712675164.0419586}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.11888627666724, "units": "Tflops", "t": 1712675164.3693047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.59155022793695, "units": "Tflops", "t": 1712675164.6305346}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 278.192}}, "t": 1712675164.5503454}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.83809155496013, "units": "Tflops", "t": 1712675164.8895073}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.01227459991352, "units": "Tflops", "t": 1712675165.151337}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 243.141}}, "t": 1712675165.0589564}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.5323097548228, "units": "Tflops", "t": 1712675165.4147675}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.39246801549265, "units": "Tflops", "t": 1712675165.6741521}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 294.992}}, "t": 1712675165.5729632}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.1737266848227, "units": "Tflops", "t": 1712675165.9348416}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 255.7442024369476, "units": "Tflops", "t": 1712675166.1928527}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 254.993}}, "t": 1712675166.0832725}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.47879990402436, "units": "Tflops", "t": 1712675166.4522054}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.11788222872744, "units": "Tflops", "t": 1712675166.7160227}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 232.852}}, "t": 1712675166.5935948}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 249.89290062002735, "units": "Tflops", "t": 1712675166.9801588}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.07786122724713, "units": "Tflops", "t": 1712675167.2408886}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 289.74}}, "t": 1712675167.1094956}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 255.06713215744526, "units": "Tflops", "t": 1712675167.499654}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 256.1952900870739, "units": "Tflops", "t": 1712675167.7572112}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 72, "power": 290.236}}, "t": 1712675167.6182575}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 255.50757433710353, "units": "Tflops", "t": 1712675168.015516}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "30", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp16"], "time": 1712675168.5660117, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/fp32.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/fp32.D0.data new file mode 100644 index 000000000..a026d95e2 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/fp32.D0.data @@ -0,0 +1,300 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}, {"aliaslist": [], "hostname": "majorpossum.eastus2.cloudapp.azure.com", "ip": "majorpossum.eastus2.cloudapp.azure.com", "ipaddrlist": ["40.70.216.57"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": false, "main": false, "name": "node1", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "suvojova.2024-04-09_15:00:22.247229", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f837b72a6bbb71a07eed38e1458b35b0", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp32"}, "weight": 0.0, "name": "fp32", "tag": ["fp32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "realbird", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-e5df2666-9a32-cf33-2409-1971e6e097bd": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 65, "power": 87.397, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712675203.551036, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32"], "time": 1712675203.560864}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 18.731679137745452, "units": "Tflops", "t": 1712675206.440318}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [882.5, 81920.0], "load": 0, "temperature": 62, "power": 53.233}}, "t": 1712675204.9024148}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 0.01, "temperature": 66, "power": 177.891}}, "t": 1712675205.410332}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 68, "power": 289.443}}, "t": 1712675205.9167814}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 68, "power": 289.064}}, "t": 1712675206.424395}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.065684840084106, "units": "Tflops", "t": 1712675207.5945344}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 69, "power": 286.314}}, "t": 1712675206.930912}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 69, "power": 280.458}}, "t": 1712675207.4408803}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.061583059941178, "units": "Tflops", "t": 1712675208.7483604}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 69, "power": 279.789}}, "t": 1712675207.950059}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 69, "power": 278.932}}, "t": 1712675208.4577475}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.066946068652246, "units": "Tflops", "t": 1712675209.901854}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 69, "power": 288.782}}, "t": 1712675208.9666634}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 69, "power": 276.848}}, "t": 1712675209.474204}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.071890116711156, "units": "Tflops", "t": 1712675211.0550742}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 69, "power": 288.297}}, "t": 1712675209.9817305}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 69, "power": 286.121}}, "t": 1712675210.4903524}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 69, "power": 284.942}}, "t": 1712675210.9968674}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.063899698654083, "units": "Tflops", "t": 1712675212.208819}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 69, "power": 279.406}}, "t": 1712675211.5042555}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 69, "power": 286.721}}, "t": 1712675212.0107796}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.059790809213414, "units": "Tflops", "t": 1712675213.3627741}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 69, "power": 278.398}}, "t": 1712675212.5215454}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 290.82}}, "t": 1712675213.0280309}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.06458534257464, "units": "Tflops", "t": 1712675214.516439}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 279.301}}, "t": 1712675213.5373816}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 285.825}}, "t": 1712675214.0438652}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.06309196305918, "units": "Tflops", "t": 1712675215.670188}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 284.304}}, "t": 1712675214.5517879}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 281.058}}, "t": 1712675215.05831}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 289.045}}, "t": 1712675215.564724}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.060889753753436, "units": "Tflops", "t": 1712675216.8241198}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 279.98}}, "t": 1712675216.0713432}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 290.816}}, "t": 1712675216.577904}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.062717669772926, "units": "Tflops", "t": 1712675217.9778674}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 290.514}}, "t": 1712675217.0844846}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 282.804}}, "t": 1712675217.592093}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.06009408949054, "units": "Tflops", "t": 1712675219.1317713}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 284.775}}, "t": 1712675218.0986419}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 289.06}}, "t": 1712675218.6077414}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 281.55}}, "t": 1712675219.116927}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.063749966887862, "units": "Tflops", "t": 1712675220.2855227}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 279.902}}, "t": 1712675219.6234193}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 292.561}}, "t": 1712675220.1301932}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.062221261351826, "units": "Tflops", "t": 1712675221.4392998}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 279.674}}, "t": 1712675220.6377275}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 281.556}}, "t": 1712675221.1442657}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.061776091662253, "units": "Tflops", "t": 1712675222.5931308}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 288.475}}, "t": 1712675221.6512654}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 286.534}}, "t": 1712675222.1617453}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.07043896600495, "units": "Tflops", "t": 1712675223.7464137}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 285.046}}, "t": 1712675222.670818}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 291.897}}, "t": 1712675223.178759}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 281.129}}, "t": 1712675223.686411}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.067671350640342, "units": "Tflops", "t": 1712675224.899934}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 290.44}}, "t": 1712675224.19292}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 281.337}}, "t": 1712675224.7001703}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.069153620211818, "units": "Tflops", "t": 1712675226.0532913}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 292.78}}, "t": 1712675225.2076201}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 282.896}}, "t": 1712675225.7149978}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.0671313256523, "units": "Tflops", "t": 1712675227.2067935}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 287.015}}, "t": 1712675226.225109}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 283.104}}, "t": 1712675226.7355392}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.066796289028765, "units": "Tflops", "t": 1712675228.3602967}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 287.203}}, "t": 1712675227.2422447}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 282.617}}, "t": 1712675227.752219}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 280.165}}, "t": 1712675228.2587194}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.075230968617362, "units": "Tflops", "t": 1712675229.5133562}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 293.439}}, "t": 1712675228.7664149}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 282.122}}, "t": 1712675229.2752697}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.07832042436593, "units": "Tflops", "t": 1712675230.6661983}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 290.812}}, "t": 1712675229.7818048}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 287.797}}, "t": 1712675230.2894464}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.078754527695814, "units": "Tflops", "t": 1712675231.8190022}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 289.568}}, "t": 1712675230.796782}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 282.508}}, "t": 1712675231.305113}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 292.873}}, "t": 1712675231.8125236}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.07460767485667, "units": "Tflops", "t": 1712675232.9720707}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 280.644}}, "t": 1712675232.3193643}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 294.21}}, "t": 1712675232.828973}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.07117240044492, "units": "Tflops", "t": 1712675234.1253216}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 282.911}}, "t": 1712675233.3380609}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 291.501}}, "t": 1712675233.847119}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.068235058987117, "units": "Tflops", "t": 1712675235.2787454}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 288.178}}, "t": 1712675234.3580985}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 285.945}}, "t": 1712675234.8660314}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.07117240044492, "units": "Tflops", "t": 1712675236.431985}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 294.807}}, "t": 1712675235.374325}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 281.443}}, "t": 1712675235.880815}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 294.133}}, "t": 1712675236.3883624}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.069894841421657, "units": "Tflops", "t": 1712675237.5853374}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 284.381}}, "t": 1712675236.8960233}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 285.854}}, "t": 1712675237.4033577}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.070095927324374, "units": "Tflops", "t": 1712675238.7386518}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 289.066}}, "t": 1712675237.9107802}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 293.746}}, "t": 1712675238.418264}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.075424276856477, "units": "Tflops", "t": 1712675239.891633}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 282.13}}, "t": 1712675238.9257927}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 295.192}}, "t": 1712675239.4335542}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.069457198527466, "units": "Tflops", "t": 1712675241.044999}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 283.591}}, "t": 1712675239.9400759}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 293.934}}, "t": 1712675240.45043}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 292.661}}, "t": 1712675240.958064}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.074193483177368, "units": "Tflops", "t": 1712675242.1981175}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 290.537}}, "t": 1712675241.464561}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 293.078}}, "t": 1712675241.9710739}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.082997941268562, "units": "Tflops", "t": 1712675243.3506382}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 283.03}}, "t": 1712675242.4775994}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 295.315}}, "t": 1712675242.9852397}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.08823869384589, "units": "Tflops", "t": 1712675244.5028749}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 284.282}}, "t": 1712675243.49273}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 296.671}}, "t": 1712675244.0002184}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.08035298750689, "units": "Tflops", "t": 1712675245.6555555}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 282.397}}, "t": 1712675244.5067284}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 289.245}}, "t": 1712675245.0132718}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 290.512}}, "t": 1712675245.522407}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.090490692244593, "units": "Tflops", "t": 1712675246.8076591}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 290.986}}, "t": 1712675246.0316002}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 293.526}}, "t": 1712675246.5399573}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.087436794646, "units": "Tflops", "t": 1712675247.959917}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 287.007}}, "t": 1712675247.047624}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 295.121}}, "t": 1712675247.554147}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.084451004008496, "units": "Tflops", "t": 1712675249.1123502}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 281.532}}, "t": 1712675248.060681}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 292.772}}, "t": 1712675248.5672162}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 297.719}}, "t": 1712675249.0747848}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.087010195927874, "units": "Tflops", "t": 1712675250.264661}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 284.454}}, "t": 1712675249.5830886}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 291.706}}, "t": 1712675250.089619}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.088183388289636, "units": "Tflops", "t": 1712675251.4168727}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 290.538}}, "t": 1712675250.5997002}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 289.355}}, "t": 1712675251.1066308}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.08075165748376, "units": "Tflops", "t": 1712675252.5695307}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 291.124}}, "t": 1712675251.6156964}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 286.419}}, "t": 1712675252.1263368}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.077136606592433, "units": "Tflops", "t": 1712675253.7224312}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 297.337}}, "t": 1712675252.6370149}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 298.331}}, "t": 1712675253.1447515}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 295.025}}, "t": 1712675253.6512473}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.084676090583844, "units": "Tflops", "t": 1712675254.874887}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 286.721}}, "t": 1712675254.1587784}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 295.602}}, "t": 1712675254.6663527}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.080783236015765, "units": "Tflops", "t": 1712675256.0275378}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 288.693}}, "t": 1712675255.1739786}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 292.183}}, "t": 1712675255.6805358}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.077010341367657, "units": "Tflops", "t": 1712675257.1804485}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 295.703}}, "t": 1712675256.188453}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 293.958}}, "t": 1712675256.6949415}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.070880597189905, "units": "Tflops", "t": 1712675258.3337245}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 287.013}}, "t": 1712675257.205016}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 299.006}}, "t": 1712675257.7150216}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 282.912}}, "t": 1712675258.2215314}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.077191848153863, "units": "Tflops", "t": 1712675259.4866505}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 296.4}}, "t": 1712675258.7309465}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 289.168}}, "t": 1712675259.2413368}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.078963693625212, "units": "Tflops", "t": 1712675260.6394491}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 289.731}}, "t": 1712675259.7490027}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 292.08}}, "t": 1712675260.2573454}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.082109628752338, "units": "Tflops", "t": 1712675261.7920232}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 294.503}}, "t": 1712675260.7648773}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 299.281}}, "t": 1712675261.2714658}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 284.361}}, "t": 1712675261.7779684}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.0850472975868, "units": "Tflops", "t": 1712675262.944488}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 296.849}}, "t": 1712675262.2847114}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 290.231}}, "t": 1712675262.7923906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.089898009570387, "units": "Tflops", "t": 1712675264.0965917}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 293.648}}, "t": 1712675263.2988794}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 291.224}}, "t": 1712675263.8089595}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.084206179025056, "units": "Tflops", "t": 1712675265.249038}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 296.747}}, "t": 1712675264.3212998}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 297.255}}, "t": 1712675264.8277955}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.08760664944328, "units": "Tflops", "t": 1712675266.4012728}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 299.789}}, "t": 1712675265.3368912}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 287.217}}, "t": 1712675265.8475657}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 286.328}}, "t": 1712675266.355267}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.08985454762259, "units": "Tflops", "t": 1712675267.5534418}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 299.212}}, "t": 1712675266.8627875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 286.522}}, "t": 1712675267.370316}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.090727775618948, "units": "Tflops", "t": 1712675268.7055273}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 294.306}}, "t": 1712675267.8779955}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 293.158}}, "t": 1712675268.3864846}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.09138373697215, "units": "Tflops", "t": 1712675269.8575704}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 289.453}}, "t": 1712675268.8929868}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 293.354}}, "t": 1712675269.3996968}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.096427451316274, "units": "Tflops", "t": 1712675271.0093422}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 295.684}}, "t": 1712675269.9098146}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 288.191}}, "t": 1712675270.4189768}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 296.002}}, "t": 1712675270.928044}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.096075569737568, "units": "Tflops", "t": 1712675272.1611183}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 286.339}}, "t": 1712675271.4388337}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 288.46}}, "t": 1712675271.9455237}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.094751187923393, "units": "Tflops", "t": 1712675273.3129284}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 284.775}}, "t": 1712675272.4534378}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 297.276}}, "t": 1712675272.9599807}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.094612830273014, "units": "Tflops", "t": 1712675274.4647458}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 288.0}}, "t": 1712675273.4678526}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 299.105}}, "t": 1712675273.974343}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.09485001604411, "units": "Tflops", "t": 1712675275.6165519}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 289.559}}, "t": 1712675274.4826267}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 296.285}}, "t": 1712675274.9891214}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 291.307}}, "t": 1712675275.495752}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.08270972414416, "units": "Tflops", "t": 1712675276.7691264}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 285.458}}, "t": 1712675276.004943}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 294.607}}, "t": 1712675276.5153596}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.058947977049133, "units": "Tflops", "t": 1712675277.9230993}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 311.485}}, "t": 1712675277.0258062}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 297.025}}, "t": 1712675277.536139}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.04623577238689, "units": "Tflops", "t": 1712675279.07787}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 291.395}}, "t": 1712675278.0478246}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 307.166}}, "t": 1712675278.554341}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 281.075}}, "t": 1712675279.0608172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.034978197965646, "units": "Tflops", "t": 1712675280.2333796}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 308.518}}, "t": 1712675279.5673966}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 296.779}}, "t": 1712675280.0739477}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.05740035322804, "units": "Tflops", "t": 1712675281.3874962}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 276.876}}, "t": 1712675280.5816293}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 307.202}}, "t": 1712675281.0881753}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.06590554303862, "units": "Tflops", "t": 1712675282.5410867}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 294.713}}, "t": 1712675281.5956686}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 302.286}}, "t": 1712675282.105572}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.07138534359112, "units": "Tflops", "t": 1712675283.694313}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 277.331}}, "t": 1712675282.612851}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 311.951}}, "t": 1712675283.1202316}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 287.236}}, "t": 1712675283.6303914}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.068432167672167, "units": "Tflops", "t": 1712675284.8477478}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 304.075}}, "t": 1712675284.137978}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 281.434}}, "t": 1712675284.6491387}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.02746222068485, "units": "Tflops", "t": 1712675286.0036285}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 308.14}}, "t": 1712675285.1578302}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 287.786}}, "t": 1712675285.6682196}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.042838230263854, "units": "Tflops", "t": 1712675287.1585772}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 309.507}}, "t": 1712675286.1759572}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 288.955}}, "t": 1712675286.6824594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.039555877603437, "units": "Tflops", "t": 1712675288.3137321}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 290.708}}, "t": 1712675287.1892283}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 292.372}}, "t": 1712675287.695799}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 287.101}}, "t": 1712675288.2034175}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.032919942085915, "units": "Tflops", "t": 1712675289.4693542}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 293.337}}, "t": 1712675288.711628}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 285.15}}, "t": 1712675289.218979}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.033638709873, "units": "Tflops", "t": 1712675290.6249042}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 279.469}}, "t": 1712675289.7268696}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 295.6}}, "t": 1712675290.23461}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.012628338526106, "units": "Tflops", "t": 1712675291.7817204}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 294.9}}, "t": 1712675290.742835}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 305.22}}, "t": 1712675291.2496915}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 278.892}}, "t": 1712675291.7598436}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.041721710268604, "units": "Tflops", "t": 1712675292.9368026}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 289.453}}, "t": 1712675292.2663345}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 290.442}}, "t": 1712675292.775408}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.025829439184108, "units": "Tflops", "t": 1712675294.0927904}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 285.331}}, "t": 1712675293.2845411}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 295.485}}, "t": 1712675293.7938776}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.034192551590788, "units": "Tflops", "t": 1712675295.2482653}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 296.278}}, "t": 1712675294.3004296}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 283.998}}, "t": 1712675294.8084288}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.047997938229347, "units": "Tflops", "t": 1712675296.4028933}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 290.696}}, "t": 1712675295.3159401}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 311.845}}, "t": 1712675295.822516}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 278.807}}, "t": 1712675296.3315566}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.035803196463473, "units": "Tflops", "t": 1712675297.5583093}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 304.555}}, "t": 1712675296.8392494}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 282.22}}, "t": 1712675297.3457315}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.038616586139455, "units": "Tflops", "t": 1712675298.7135136}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 287.204}}, "t": 1712675297.8532495}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 309.89}}, "t": 1712675298.3620186}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.013286782628903, "units": "Tflops", "t": 1712675299.8702562}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 296.852}}, "t": 1712675298.8716364}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 311.572}}, "t": 1712675299.3805044}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.014505808446273, "units": "Tflops", "t": 1712675301.0269265}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 282.492}}, "t": 1712675299.8888938}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 288.789}}, "t": 1712675300.3963807}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 310.663}}, "t": 1712675300.9028838}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.037846357700523, "units": "Tflops", "t": 1712675302.1822085}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 285.538}}, "t": 1712675301.4097843}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 302.504}}, "t": 1712675301.9162858}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.013470998369556, "units": "Tflops", "t": 1712675303.3389423}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 286.356}}, "t": 1712675302.4268312}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 281.639}}, "t": 1712675302.936109}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.063048623084136, "units": "Tflops", "t": 1712675304.4926727}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 307.679}}, "t": 1712675303.4472642}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 279.09}}, "t": 1712675303.957145}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 298.898}}, "t": 1712675304.4636195}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.021026918430817, "units": "Tflops", "t": 1712675305.6489832}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 296.363}}, "t": 1712675304.9702308}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 291.198}}, "t": 1712675305.4772983}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.026791019367792, "units": "Tflops", "t": 1712675306.8049078}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 293.337}}, "t": 1712675305.9862068}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 301.448}}, "t": 1712675306.492666}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.024966062246186, "units": "Tflops", "t": 1712675307.9609852}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 302.515}}, "t": 1712675307.0002666}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 296.96}}, "t": 1712675307.5091932}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.010386835059602, "units": "Tflops", "t": 1712675309.1179066}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 306.998}}, "t": 1712675308.016092}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 279.032}}, "t": 1712675308.524487}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 290.637}}, "t": 1712675309.032237}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32"], "time": 1712675309.972953, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/llama.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/llama.D0.data new file mode 100644 index 000000000..1f49c860d --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/llama.D0.data @@ -0,0 +1,630 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}, {"aliaslist": [], "hostname": "majorpossum.eastus2.cloudapp.azure.com", "ip": "majorpossum.eastus2.cloudapp.azure.com", "ipaddrlist": ["40.70.216.57"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": false, "main": false, "name": "node1", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/llm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "llm", "install_group": "torch", "install_variant": "cuda", "run_name": "suvojova.2024-04-09_15:00:22.247229", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 800, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f837b72a6bbb71a07eed38e1458b35b0", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama", "plan": {"method": "per_gpu"}, "tags": ["llm", "nlp"], "weight": 1.0, "name": "llama", "tag": ["llama", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "realbird", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-e5df2666-9a32-cf33-2409-1971e6e097bd": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 30, "power": 42.126, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712674825.37422, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["python", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1712674825.384195}, "pipe": null} +{"event": "line", "data": "Dataset\n", "pipe": "stderr"} +{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/datasets/table.py:1421: FutureWarning: promote has been superseded by mode='default'.\n", "pipe": "stderr"} +{"event": "line", "data": " table = cls._concat_blocks(blocks, axis=0)\n", "pipe": "stderr"} +{"event": "line", "data": "Tokenizer\n", "pipe": "stderr"} +{"event": "line", "data": "Model\n", "pipe": "stderr"} +{"event": "line", "data": "Pipeline\n", "pipe": "stderr"} +{"event": "line", "data": "Starting\n", "pipe": "stderr"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =10.088218688964844, total / elapsed =193.39390433061607 in_token_count =9 out_token_count =1942\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 193.39390433061607, "units": "Tok/s", "t": 1712674897.5574486}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27230.375, 81920.0], "load": 0, "temperature": 31, "power": 61.906}}, "t": 1712674887.5536067}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27252.375, 81920.0], "load": 0, "temperature": 31, "power": 62.303}}, "t": 1712674888.061904}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27284.375, 81920.0], "load": 0.42, "temperature": 33, "power": 234.188}}, "t": 1712674888.5730636}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27328.375, 81920.0], "load": 0.97, "temperature": 34, "power": 231.916}}, "t": 1712674889.0857933}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27414.375, 81920.0], "load": 0.97, "temperature": 35, "power": 238.175}}, "t": 1712674889.5979338}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27574.375, 81920.0], "load": 0.97, "temperature": 35, "power": 233.337}}, "t": 1712674890.1104434}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27614.375, 81920.0], "load": 0.97, "temperature": 36, "power": 235.574}}, "t": 1712674890.6195536}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27674.375, 81920.0], "load": 0.97, "temperature": 36, "power": 231.973}}, "t": 1712674891.1305933}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27714.375, 81920.0], "load": 0.96, "temperature": 36, "power": 237.209}}, "t": 1712674891.6389866}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27754.375, 81920.0], "load": 0.96, "temperature": 37, "power": 238.382}}, "t": 1712674892.148412}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27814.375, 81920.0], "load": 0.97, "temperature": 37, "power": 226.917}}, "t": 1712674892.65669}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27914.375, 81920.0], "load": 0.97, "temperature": 37, "power": 237.813}}, "t": 1712674893.1673505}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27914.375, 81920.0], "load": 0.96, "temperature": 37, "power": 238.834}}, "t": 1712674893.6757026}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28034.375, 81920.0], "load": 0.97, "temperature": 38, "power": 239.297}}, "t": 1712674894.1869938}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28034.375, 81920.0], "load": 0.97, "temperature": 38, "power": 237.919}}, "t": 1712674894.6952918}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28034.375, 81920.0], "load": 0.96, "temperature": 38, "power": 239.374}}, "t": 1712674895.2068613}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28234.375, 81920.0], "load": 0.96, "temperature": 38, "power": 238.545}}, "t": 1712674895.718112}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28254.375, 81920.0], "load": 0.97, "temperature": 39, "power": 235.129}}, "t": 1712674896.225861}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28254.375, 81920.0], "load": 0.97, "temperature": 39, "power": 238.685}}, "t": 1712674896.735081}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28254.375, 81920.0], "load": 0.97, "temperature": 39, "power": 235.34}}, "t": 1712674897.2438748}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =5.300233602523804, total / elapsed =365.8329321705197 in_token_count =185 out_token_count =1754\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 365.8329321705197, "units": "Tok/s", "t": 1712674902.8577058}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.95, "temperature": 42, "power": 299.225}}, "t": 1712674897.7542367}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 39, "power": 227.851}}, "t": 1712674898.2680998}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 39, "power": 227.938}}, "t": 1712674898.7756364}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 40, "power": 232.672}}, "t": 1712674899.2867415}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 40, "power": 232.109}}, "t": 1712674899.7978373}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 40, "power": 232.673}}, "t": 1712674900.305121}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 40, "power": 231.126}}, "t": 1712674900.8164623}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 40, "power": 230.23}}, "t": 1712674901.324765}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 41, "power": 222.768}}, "t": 1712674901.835798}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 41, "power": 227.719}}, "t": 1712674902.3451033}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 41, "power": 235.281}}, "t": 1712674902.8540745}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =6.746753692626953, total / elapsed =296.1424250871159 in_token_count =121 out_token_count =1877\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 296.1424250871159, "units": "Tok/s", "t": 1712674909.6044824}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 41, "power": 228.633}}, "t": 1712674903.3624384}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 41, "power": 233.648}}, "t": 1712674903.8714359}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 41, "power": 234.224}}, "t": 1712674904.3838406}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 42, "power": 228.666}}, "t": 1712674904.8967655}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 42, "power": 233.919}}, "t": 1712674905.4072886}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 42, "power": 232.512}}, "t": 1712674905.917214}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 42, "power": 234.287}}, "t": 1712674906.4271526}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 42, "power": 234.373}}, "t": 1712674906.9360297}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 42, "power": 234.06}}, "t": 1712674907.4459856}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 43, "power": 233.79}}, "t": 1712674907.9568195}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 43, "power": 234.365}}, "t": 1712674908.4660063}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 43, "power": 231.791}}, "t": 1712674908.9751143}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 43, "power": 232.439}}, "t": 1712674909.4830854}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =6.609412670135498, total / elapsed =316.06439244429475 in_token_count =127 out_token_count =1962\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 316.06439244429475, "units": "Tok/s", "t": 1712674916.213914}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 44, "power": 231.298}}, "t": 1712674909.991496}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 43, "power": 235.376}}, "t": 1712674910.5021307}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 44, "power": 236.759}}, "t": 1712674911.0150733}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 44, "power": 229.232}}, "t": 1712674911.5233247}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 44, "power": 234.069}}, "t": 1712674912.0345898}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 44, "power": 232.705}}, "t": 1712674912.5437095}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 44, "power": 234.458}}, "t": 1712674913.0545666}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 44, "power": 233.24}}, "t": 1712674913.563869}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 44, "power": 234.818}}, "t": 1712674914.0734682}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 45, "power": 235.39}}, "t": 1712674914.581794}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 45, "power": 235.395}}, "t": 1712674915.0943575}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 45, "power": 235.021}}, "t": 1712674915.606989}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 45, "power": 231.404}}, "t": 1712674916.114811}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.348150730133057, total / elapsed =207.63464946530368 in_token_count =6 out_token_count =1935\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 207.63464946530368, "units": "Tok/s", "t": 1712674925.5620825}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 45, "power": 236.919}}, "t": 1712674916.6260123}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 45, "power": 233.729}}, "t": 1712674917.1357439}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 46, "power": 230.996}}, "t": 1712674917.6468904}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 46, "power": 237.261}}, "t": 1712674918.1582327}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 46, "power": 232.203}}, "t": 1712674918.667419}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 46, "power": 236.06}}, "t": 1712674919.1757197}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 46, "power": 226.169}}, "t": 1712674919.6843314}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 46, "power": 233.003}}, "t": 1712674920.1937096}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 46, "power": 238.936}}, "t": 1712674920.7028885}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 47, "power": 231.063}}, "t": 1712674921.2112327}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 47, "power": 236.332}}, "t": 1712674921.721787}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 47, "power": 235.082}}, "t": 1712674922.231615}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 47, "power": 236.36}}, "t": 1712674922.741374}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 47, "power": 238.892}}, "t": 1712674923.2491553}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 47, "power": 240.938}}, "t": 1712674923.75836}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 47, "power": 237.468}}, "t": 1712674924.2711601}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 48, "power": 231.56}}, "t": 1712674924.7795055}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 48, "power": 243.593}}, "t": 1712674925.290795}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.659961700439453, total / elapsed =588.5307487620344 in_token_count =256 out_token_count =1898\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 588.5307487620344, "units": "Tok/s", "t": 1712674929.222061}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.95, "temperature": 51, "power": 294.128}}, "t": 1712674925.7992022}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.97, "temperature": 48, "power": 239.311}}, "t": 1712674926.3087401}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.97, "temperature": 48, "power": 242.122}}, "t": 1712674926.8164885}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.97, "temperature": 48, "power": 240.671}}, "t": 1712674927.3254228}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.97, "temperature": 48, "power": 240.676}}, "t": 1712674927.8354828}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.97, "temperature": 49, "power": 238.912}}, "t": 1712674928.3448703}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.97, "temperature": 49, "power": 236.341}}, "t": 1712674928.853163}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =1.7477903366088867, total / elapsed =1303.9321434982767 in_token_count =340 out_token_count =1939\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 1303.9321434982767, "units": "Tok/s", "t": 1712674930.9698687}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 51, "power": 299.675}}, "t": 1712674929.3599095}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 49, "power": 239.131}}, "t": 1712674929.8709118}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 49, "power": 237.886}}, "t": 1712674930.3792064}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 49, "power": 237.491}}, "t": 1712674930.8898714}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =7.330774784088135, total / elapsed =294.37543282383496 in_token_count =95 out_token_count =2063\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 294.37543282383496, "units": "Tok/s", "t": 1712674938.300663}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 49, "power": 239.422}}, "t": 1712674931.3998232}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 49, "power": 242.793}}, "t": 1712674931.9086494}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 50, "power": 236.855}}, "t": 1712674932.421314}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 50, "power": 241.804}}, "t": 1712674932.935365}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 50, "power": 244.397}}, "t": 1712674933.4461107}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 50, "power": 236.51}}, "t": 1712674933.9598095}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 50, "power": 241.763}}, "t": 1712674934.4711936}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 50, "power": 241.17}}, "t": 1712674934.981534}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 50, "power": 239.465}}, "t": 1712674935.4924383}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 50, "power": 241.574}}, "t": 1712674936.0042293}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 51, "power": 241.873}}, "t": 1712674936.512591}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 51, "power": 243.053}}, "t": 1712674937.0214033}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 51, "power": 241.316}}, "t": 1712674937.530053}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 51, "power": 238.049}}, "t": 1712674938.0395706}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.372143745422363, total / elapsed =215.42563311474393 in_token_count =5 out_token_count =2014\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 215.42563311474393, "units": "Tok/s", "t": 1712674947.6728258}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.95, "temperature": 51, "power": 241.593}}, "t": 1712674938.548478}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 51, "power": 239.381}}, "t": 1712674939.0612903}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 51, "power": 244.73}}, "t": 1712674939.572418}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 51, "power": 240.995}}, "t": 1712674940.0836365}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 51, "power": 236.957}}, "t": 1712674940.596458}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 51, "power": 241.793}}, "t": 1712674941.1062074}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 52, "power": 244.344}}, "t": 1712674941.615313}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 52, "power": 237.815}}, "t": 1712674942.12656}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 52, "power": 239.297}}, "t": 1712674942.6347382}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 52, "power": 244.782}}, "t": 1712674943.1439848}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 52, "power": 234.957}}, "t": 1712674943.6534386}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 52, "power": 236.414}}, "t": 1712674944.1633735}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 52, "power": 242.873}}, "t": 1712674944.6746902}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 52, "power": 243.109}}, "t": 1712674945.1872332}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 52, "power": 242.13}}, "t": 1712674945.6955626}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 52, "power": 241.476}}, "t": 1712674946.2088773}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 53, "power": 238.92}}, "t": 1712674946.7177432}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 53, "power": 237.751}}, "t": 1712674947.2289748}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.7360401153564453, total / elapsed =551.653605518999 in_token_count =253 out_token_count =1808\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 551.653605518999, "units": "Tok/s", "t": 1712674951.408886}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 55, "power": 246.613}}, "t": 1712674947.7377527}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 53, "power": 238.989}}, "t": 1712674948.246086}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 53, "power": 243.462}}, "t": 1712674948.7560117}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 53, "power": 245.09}}, "t": 1712674949.263601}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 53, "power": 242.836}}, "t": 1712674949.7712765}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 53, "power": 244.24}}, "t": 1712674950.2807612}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 53, "power": 244.475}}, "t": 1712674950.7903304}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 53, "power": 235.362}}, "t": 1712674951.3014889}, "pipe": "data"} +{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.098356008529663, total / elapsed =679.3925534073587 in_token_count =282 out_token_count =1823\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 679.3925534073587, "units": "Tok/s", "t": 1712674954.5072587}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.99, "temperature": 54, "power": 233.336}}, "t": 1712674951.8135908}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 54, "power": 239.008}}, "t": 1712674952.324761}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 54, "power": 247.438}}, "t": 1712674952.8331275}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 54, "power": 235.104}}, "t": 1712674953.3445466}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 54, "power": 249.489}}, "t": 1712674953.8538673}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 54, "power": 246.455}}, "t": 1712674954.3630867}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.668853759765625, total / elapsed =575.3840676753646 in_token_count =256 out_token_count =1855\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 575.3840676753646, "units": "Tok/s", "t": 1712674958.1761305}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 1.0, "temperature": 55, "power": 226.246}}, "t": 1712674954.872915}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 54, "power": 240.428}}, "t": 1712674955.381831}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 54, "power": 240.675}}, "t": 1712674955.8923314}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 54, "power": 243.15}}, "t": 1712674956.4005384}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 54, "power": 241.892}}, "t": 1712674956.9145167}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 55, "power": 238.134}}, "t": 1712674957.4265013}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 55, "power": 247.653}}, "t": 1712674957.9381745}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.37063479423523, total / elapsed =210.65808702889538 in_token_count =5 out_token_count =1969\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 210.65808702889538, "units": "Tok/s", "t": 1712674967.546786}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.95, "temperature": 55, "power": 248.176}}, "t": 1712674958.4494689}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 55, "power": 242.208}}, "t": 1712674958.9577622}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 55, "power": 239.814}}, "t": 1712674959.4677083}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 55, "power": 246.513}}, "t": 1712674959.97738}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 55, "power": 241.036}}, "t": 1712674960.4869568}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 55, "power": 247.292}}, "t": 1712674960.9957337}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 55, "power": 243.146}}, "t": 1712674961.5050366}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 55, "power": 245.874}}, "t": 1712674962.0144198}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 55, "power": 244.502}}, "t": 1712674962.5232456}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 55, "power": 245.891}}, "t": 1712674963.035722}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 56, "power": 248.861}}, "t": 1712674963.5433488}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 56, "power": 245.624}}, "t": 1712674964.0546694}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 56, "power": 245.693}}, "t": 1712674964.564418}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 56, "power": 246.28}}, "t": 1712674965.0756352}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 56, "power": 250.089}}, "t": 1712674965.5838938}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 56, "power": 249.514}}, "t": 1712674966.0947747}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 56, "power": 247.834}}, "t": 1712674966.6040912}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 56, "power": 245.416}}, "t": 1712674967.113951}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =1.5308899879455566, total / elapsed =1377.6300169225503 in_token_count =349 out_token_count =1760\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 1377.6300169225503, "units": "Tok/s", "t": 1712674969.0776908}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.95, "temperature": 59, "power": 244.972}}, "t": 1712674967.6323435}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.98, "temperature": 57, "power": 253.026}}, "t": 1712674968.140077}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 56, "power": 250.496}}, "t": 1712674968.6522603}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =2.9774746894836426, total / elapsed =774.4818144532774 in_token_count =287 out_token_count =2019\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 774.4818144532774, "units": "Tok/s", "t": 1712674972.055183}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 57, "power": 317.304}}, "t": 1712674969.1628683}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.99, "temperature": 57, "power": 251.5}}, "t": 1712674969.6733167}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 57, "power": 253.006}}, "t": 1712674970.1842108}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 57, "power": 252.971}}, "t": 1712674970.6971352}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 57, "power": 250.447}}, "t": 1712674971.2070663}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 57, "power": 248.258}}, "t": 1712674971.716452}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.32282829284668, total / elapsed =221.82109710062528 in_token_count =7 out_token_count =2061\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 221.82109710062528, "units": "Tok/s", "t": 1712674981.3780327}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 57, "power": 241.059}}, "t": 1712674972.2260616}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 57, "power": 247.494}}, "t": 1712674972.7358878}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 57, "power": 254.628}}, "t": 1712674973.2453034}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 57, "power": 251.162}}, "t": 1712674973.753646}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 57, "power": 248.253}}, "t": 1712674974.2614384}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 57, "power": 250.863}}, "t": 1712674974.7710638}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 57, "power": 249.577}}, "t": 1712674975.2839453}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 57, "power": 252.135}}, "t": 1712674975.7923572}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 58, "power": 252.226}}, "t": 1712674976.3033166}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 58, "power": 253.876}}, "t": 1712674976.8115864}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 58, "power": 247.916}}, "t": 1712674977.3223877}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 58, "power": 246.811}}, "t": 1712674977.8306756}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 58, "power": 250.094}}, "t": 1712674978.3415852}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 58, "power": 251.847}}, "t": 1712674978.8492336}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 58, "power": 252.705}}, "t": 1712674979.358207}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 58, "power": 250.625}}, "t": 1712674979.8665676}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 58, "power": 247.141}}, "t": 1712674980.3788507}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 58, "power": 248.416}}, "t": 1712674980.8885784}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =1.1961157321929932, total / elapsed =1840.9589814212957 in_token_count =363 out_token_count =1839\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 1840.9589814212957, "units": "Tok/s", "t": 1712674982.5741606}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 58, "power": 254.94}}, "t": 1712674981.39825}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 1.0, "temperature": 59, "power": 248.777}}, "t": 1712674981.9118578}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 58, "power": 244.557}}, "t": 1712674982.420194}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.31668996810913, total / elapsed =212.0924928020374 in_token_count =7 out_token_count =1969\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 212.0924928020374, "units": "Tok/s", "t": 1712674991.8908696}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 58, "power": 253.491}}, "t": 1712674982.9314048}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 58, "power": 250.841}}, "t": 1712674983.4412286}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 59, "power": 255.177}}, "t": 1712674983.950912}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 59, "power": 252.729}}, "t": 1712674984.461828}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 59, "power": 250.444}}, "t": 1712674984.9711747}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 59, "power": 255.652}}, "t": 1712674985.4800587}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 59, "power": 248.775}}, "t": 1712674985.9884262}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 59, "power": 253.248}}, "t": 1712674986.5008078}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 59, "power": 256.293}}, "t": 1712674987.0127246}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 59, "power": 249.363}}, "t": 1712674987.5210547}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 59, "power": 250.059}}, "t": 1712674988.0323136}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 59, "power": 253.09}}, "t": 1712674988.542066}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 59, "power": 251.79}}, "t": 1712674989.0506465}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 59, "power": 251.621}}, "t": 1712674989.558235}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 59, "power": 250.957}}, "t": 1712674990.068098}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 59, "power": 252.606}}, "t": 1712674990.5763803}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 59, "power": 254.184}}, "t": 1712674991.0858123}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 60, "power": 251.337}}, "t": 1712674991.5942142}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =1.6513347625732422, total / elapsed =1222.0417360168635 in_token_count =344 out_token_count =1674\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 1222.0417360168635, "units": "Tok/s", "t": 1712674993.542219}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 62, "power": 307.184}}, "t": 1712674992.101825}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.98, "temperature": 60, "power": 253.884}}, "t": 1712674992.6101038}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 60, "power": 250.701}}, "t": 1712674993.121374}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =6.7264015674591064, total / elapsed =308.4858938601594 in_token_count =122 out_token_count =1953\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 308.4858938601594, "units": "Tok/s", "t": 1712675000.268639}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 62, "power": 257.692}}, "t": 1712674993.629382}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 60, "power": 249.296}}, "t": 1712674994.1389868}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 60, "power": 255.344}}, "t": 1712674994.648569}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 60, "power": 245.497}}, "t": 1712674995.158134}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 60, "power": 249.253}}, "t": 1712674995.6672168}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 60, "power": 255.069}}, "t": 1712674996.176555}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 60, "power": 256.767}}, "t": 1712674996.6866949}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 60, "power": 255.208}}, "t": 1712674997.195025}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 60, "power": 255.232}}, "t": 1712674997.7047045}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 60, "power": 256.548}}, "t": 1712674998.2169533}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 60, "power": 258.045}}, "t": 1712674998.7247121}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 60, "power": 256.942}}, "t": 1712674999.2371867}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 60, "power": 254.045}}, "t": 1712674999.745455}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 61, "power": 253.158}}, "t": 1712675000.2554305}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.352882385253906, total / elapsed =212.34095738562903 in_token_count =6 out_token_count =1980\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 212.34095738562903, "units": "Tok/s", "t": 1712675009.6215398}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 61, "power": 251.638}}, "t": 1712675000.7637887}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 61, "power": 254.542}}, "t": 1712675001.274614}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 61, "power": 249.332}}, "t": 1712675001.782868}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 61, "power": 256.481}}, "t": 1712675002.2916303}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 61, "power": 249.291}}, "t": 1712675002.8003156}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 61, "power": 255.904}}, "t": 1712675003.3130627}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 61, "power": 252.383}}, "t": 1712675003.821772}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 61, "power": 257.489}}, "t": 1712675004.3324354}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 61, "power": 255.43}}, "t": 1712675004.8407397}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 61, "power": 257.124}}, "t": 1712675005.3514977}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 61, "power": 252.28}}, "t": 1712675005.8597753}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 61, "power": 253.815}}, "t": 1712675006.3696516}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 61, "power": 248.589}}, "t": 1712675006.8774326}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 61, "power": 246.629}}, "t": 1712675007.3873744}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 61, "power": 252.726}}, "t": 1712675007.8957472}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 61, "power": 249.819}}, "t": 1712675008.4052396}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 61, "power": 257.946}}, "t": 1712675008.9135706}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 62, "power": 257.629}}, "t": 1712675009.42525}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =7.429973602294922, total / elapsed =262.45046138490943 in_token_count =91 out_token_count =1859\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 262.45046138490943, "units": "Tok/s", "t": 1712675017.0515313}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 62, "power": 218.42}}, "t": 1712675009.9328992}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 62, "power": 253.005}}, "t": 1712675010.4441726}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 62, "power": 256.919}}, "t": 1712675010.953755}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 62, "power": 259.756}}, "t": 1712675011.46325}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 62, "power": 254.036}}, "t": 1712675011.971619}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 62, "power": 254.553}}, "t": 1712675012.4791017}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 62, "power": 258.548}}, "t": 1712675012.988468}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 62, "power": 253.094}}, "t": 1712675013.4972844}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 62, "power": 246.66}}, "t": 1712675014.005531}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 62, "power": 252.774}}, "t": 1712675014.5166457}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 62, "power": 250.42}}, "t": 1712675015.0279894}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 62, "power": 260.943}}, "t": 1712675015.5374403}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 62, "power": 260.023}}, "t": 1712675016.048706}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 62, "power": 258.702}}, "t": 1712675016.5583007}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =5.846822500228882, total / elapsed =363.10320689860043 in_token_count =162 out_token_count =1961\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 363.10320689860043, "units": "Tok/s", "t": 1712675022.898371}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 62, "power": 258.394}}, "t": 1712675017.0673466}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 62, "power": 245.556}}, "t": 1712675017.5764785}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 62, "power": 257.719}}, "t": 1712675018.086249}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 63, "power": 259.738}}, "t": 1712675018.5956419}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 63, "power": 259.264}}, "t": 1712675019.1050513}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 63, "power": 260.705}}, "t": 1712675019.6179585}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 63, "power": 253.03}}, "t": 1712675020.1303427}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 63, "power": 255.605}}, "t": 1712675020.6411307}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 63, "power": 251.565}}, "t": 1712675021.1541586}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 63, "power": 257.741}}, "t": 1712675021.6640193}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 63, "power": 258.461}}, "t": 1712675022.1746259}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 63, "power": 258.286}}, "t": 1712675022.683597}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =5.276852130889893, total / elapsed =398.34354798293674 in_token_count =186 out_token_count =1916\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 398.34354798293674, "units": "Tok/s", "t": 1712675028.1752403}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 226.629}}, "t": 1712675023.1940997}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 63, "power": 260.646}}, "t": 1712675023.7107244}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 63, "power": 263.066}}, "t": 1712675024.2214506}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 63, "power": 263.205}}, "t": 1712675024.731334}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 63, "power": 263.78}}, "t": 1712675025.240425}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 63, "power": 262.937}}, "t": 1712675025.7499554}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 63, "power": 264.257}}, "t": 1712675026.2597923}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 63, "power": 262.983}}, "t": 1712675026.7681055}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 63, "power": 262.463}}, "t": 1712675027.2775378}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 63, "power": 262.789}}, "t": 1712675027.7858844}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =6.8406524658203125, total / elapsed =302.01797420975265 in_token_count =117 out_token_count =1949\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 302.01797420975265, "units": "Tok/s", "t": 1712675035.0159092}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 296.532}}, "t": 1712675028.2943618}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 64, "power": 263.241}}, "t": 1712675028.8067074}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 64, "power": 252.228}}, "t": 1712675029.3179045}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 64, "power": 259.28}}, "t": 1712675029.8283024}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 64, "power": 264.051}}, "t": 1712675030.3415208}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 64, "power": 262.847}}, "t": 1712675030.8497672}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 64, "power": 248.836}}, "t": 1712675031.3597784}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 64, "power": 259.614}}, "t": 1712675031.8673317}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 64, "power": 257.131}}, "t": 1712675032.3783095}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 64, "power": 258.559}}, "t": 1712675032.8891478}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 64, "power": 256.873}}, "t": 1712675033.3986301}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 64, "power": 251.11}}, "t": 1712675033.9091673}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 64, "power": 265.592}}, "t": 1712675034.4184783}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 64, "power": 264.828}}, "t": 1712675034.9295542}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.344895839691162, total / elapsed =217.7659371393544 in_token_count =6 out_token_count =2029\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 217.7659371393544, "units": "Tok/s", "t": 1712675044.3608217}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 64, "power": 263.449}}, "t": 1712675035.4390495}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 64, "power": 256.372}}, "t": 1712675035.9502778}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 64, "power": 264.973}}, "t": 1712675036.4643517}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 64, "power": 260.945}}, "t": 1712675036.977286}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 64, "power": 264.731}}, "t": 1712675037.4874823}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 64, "power": 251.154}}, "t": 1712675037.996647}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 64, "power": 263.993}}, "t": 1712675038.5084708}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 64, "power": 259.838}}, "t": 1712675039.0168104}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 258.414}}, "t": 1712675039.5271652}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 264.68}}, "t": 1712675040.0354333}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 262.702}}, "t": 1712675040.544769}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 266.033}}, "t": 1712675041.0543382}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 253.011}}, "t": 1712675041.5637388}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 255.553}}, "t": 1712675042.0760114}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 257.608}}, "t": 1712675042.5884407}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 253.383}}, "t": 1712675043.1009042}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 257.001}}, "t": 1712675043.61198}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 262.234}}, "t": 1712675044.1225696}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =7.430144548416138, total / elapsed =263.3865313450958 in_token_count =91 out_token_count =1866\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 263.3865313450958, "units": "Tok/s", "t": 1712675051.7909815}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 65, "power": 246.723}}, "t": 1712675044.6323917}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 261.495}}, "t": 1712675045.1413755}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 261.92}}, "t": 1712675045.6497002}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 65, "power": 265.856}}, "t": 1712675046.1596513}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 259.742}}, "t": 1712675046.668999}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 257.942}}, "t": 1712675047.1773307}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 65, "power": 265.932}}, "t": 1712675047.6867998}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 267.051}}, "t": 1712675048.1951175}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 258.829}}, "t": 1712675048.7046535}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 259.563}}, "t": 1712675049.2143507}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 265.662}}, "t": 1712675049.7263887}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 65, "power": 267.599}}, "t": 1712675050.2345622}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 65, "power": 266.557}}, "t": 1712675050.7445366}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 66, "power": 266.106}}, "t": 1712675051.252897}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 66, "power": 262.69}}, "t": 1712675051.763607}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.272041082382202, total / elapsed =216.99645009371233 in_token_count =9 out_token_count =2003\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 216.99645009371233, "units": "Tok/s", "t": 1712675061.0630417}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 66, "power": 262.406}}, "t": 1712675052.2724504}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 66, "power": 266.236}}, "t": 1712675052.781405}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 66, "power": 261.954}}, "t": 1712675053.290823}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 66, "power": 258.625}}, "t": 1712675053.8004081}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 66, "power": 265.283}}, "t": 1712675054.3119082}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 66, "power": 260.3}}, "t": 1712675054.8251052}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 66, "power": 266.543}}, "t": 1712675055.3356369}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 66, "power": 262.999}}, "t": 1712675055.843878}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 66, "power": 256.687}}, "t": 1712675056.3548274}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 66, "power": 262.685}}, "t": 1712675056.8662257}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 66, "power": 265.728}}, "t": 1712675057.3758738}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 66, "power": 266.853}}, "t": 1712675057.885691}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 66, "power": 265.789}}, "t": 1712675058.393052}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 66, "power": 268.98}}, "t": 1712675058.903528}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 66, "power": 267.973}}, "t": 1712675059.4118736}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 66, "power": 266.346}}, "t": 1712675059.9214299}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 66, "power": 265.17}}, "t": 1712675060.432492}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 66, "power": 265.444}}, "t": 1712675060.9431355}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.316366672515869, total / elapsed =577.7406991448506 in_token_count =273 out_token_count =1643\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 577.7406991448506, "units": "Tok/s", "t": 1712675064.379423}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 1.0, "temperature": 67, "power": 253.913}}, "t": 1712675061.4545562}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 66, "power": 261.541}}, "t": 1712675061.964431}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 66, "power": 259.926}}, "t": 1712675062.4735467}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 66, "power": 260.285}}, "t": 1712675062.9825377}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 66, "power": 260.152}}, "t": 1712675063.4926436}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 267.741}}, "t": 1712675064.0019991}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.413318157196045, total / elapsed =614.3582002688647 in_token_count =269 out_token_count =1828\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 614.3582002688647, "units": "Tok/s", "t": 1712675067.7927601}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 303.741}}, "t": 1712675064.5089052}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 268.078}}, "t": 1712675065.0171704}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 268.422}}, "t": 1712675065.5287101}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 270.359}}, "t": 1712675066.0411425}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 67, "power": 267.82}}, "t": 1712675066.5534296}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 67, "power": 267.03}}, "t": 1712675067.0646274}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 267.828}}, "t": 1712675067.577039}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =4.686685085296631, total / elapsed =396.2288837852365 in_token_count =213 out_token_count =1644\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 396.2288837852365, "units": "Tok/s", "t": 1712675072.4794614}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 232.807}}, "t": 1712675068.088443}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 67, "power": 266.034}}, "t": 1712675068.596738}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 257.917}}, "t": 1712675069.1067238}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 255.599}}, "t": 1712675069.6142762}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 257.97}}, "t": 1712675070.1236615}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 260.924}}, "t": 1712675070.6324885}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 262.643}}, "t": 1712675071.143082}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 261.741}}, "t": 1712675071.6524966}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 268.789}}, "t": 1712675072.1663864}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.24223804473877, total / elapsed =228.62427799107004 in_token_count =11 out_token_count =2102\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 228.62427799107004, "units": "Tok/s", "t": 1712675081.7217162}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 67, "power": 250.555}}, "t": 1712675072.6759317}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 261.177}}, "t": 1712675073.184151}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 67, "power": 257.771}}, "t": 1712675073.6963964}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 266.77}}, "t": 1712675074.2055354}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 264.292}}, "t": 1712675074.7147727}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 67, "power": 264.932}}, "t": 1712675075.2244427}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 67, "power": 256.531}}, "t": 1712675075.7341862}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 67, "power": 264.478}}, "t": 1712675076.2433681}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 67, "power": 268.674}}, "t": 1712675076.7517793}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 258.901}}, "t": 1712675077.2606058}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 67, "power": 262.599}}, "t": 1712675077.7690585}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 68, "power": 265.349}}, "t": 1712675078.2773683}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 67, "power": 266.299}}, "t": 1712675078.7879994}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 263.263}}, "t": 1712675079.301156}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 263.233}}, "t": 1712675079.8095293}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 266.321}}, "t": 1712675080.3189383}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 262.977}}, "t": 1712675080.826411}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 258.915}}, "t": 1712675081.3368847}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =6.1548848152160645, total / elapsed =322.02064855870464 in_token_count =148 out_token_count =1834\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 322.02064855870464, "units": "Tok/s", "t": 1712675087.8766198}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 354.899}}, "t": 1712675081.8450155}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 267.689}}, "t": 1712675082.3526046}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 266.55}}, "t": 1712675082.8619816}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 264.586}}, "t": 1712675083.3737726}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 268.411}}, "t": 1712675083.882101}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 268.898}}, "t": 1712675084.3938322}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 68, "power": 267.384}}, "t": 1712675084.9021702}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 68, "power": 269.489}}, "t": 1712675085.4133298}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 68, "power": 268.305}}, "t": 1712675085.9208252}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 265.855}}, "t": 1712675086.4300153}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 266.666}}, "t": 1712675086.9381437}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 266.433}}, "t": 1712675087.4471345}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =7.007462739944458, total / elapsed =297.25452382733755 in_token_count =110 out_token_count =1973\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 297.25452382733755, "units": "Tok/s", "t": 1712675094.8841016}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 234.05}}, "t": 1712675087.9557092}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 264.042}}, "t": 1712675088.4639797}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 68, "power": 269.546}}, "t": 1712675088.9738815}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 68, "power": 264.248}}, "t": 1712675089.4871652}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 265.607}}, "t": 1712675089.9992154}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 265.591}}, "t": 1712675090.5125432}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 266.935}}, "t": 1712675091.0215867}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 267.728}}, "t": 1712675091.5320568}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 68, "power": 267.01}}, "t": 1712675092.0403078}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 267.386}}, "t": 1712675092.5479422}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 260.349}}, "t": 1712675093.0593917}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 256.284}}, "t": 1712675093.567674}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 271.075}}, "t": 1712675094.0782304}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 268.069}}, "t": 1712675094.5879452}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.340133428573608, total / elapsed =222.58782659783665 in_token_count =6 out_token_count =2073\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 222.58782659783665, "units": "Tok/s", "t": 1712675104.2242541}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.94, "temperature": 69, "power": 270.108}}, "t": 1712675095.0974474}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 265.646}}, "t": 1712675095.6071808}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 261.379}}, "t": 1712675096.1182756}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 270.548}}, "t": 1712675096.629496}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 264.522}}, "t": 1712675097.1371286}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 270.487}}, "t": 1712675097.6464043}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 266.507}}, "t": 1712675098.1577837}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 269.897}}, "t": 1712675098.6674483}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 260.871}}, "t": 1712675099.1778326}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 267.69}}, "t": 1712675099.6860876}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 266.366}}, "t": 1712675100.1947997}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 269.585}}, "t": 1712675100.7031813}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 263.989}}, "t": 1712675101.2154505}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 260.75}}, "t": 1712675101.72993}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 261.315}}, "t": 1712675102.239522}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 259.386}}, "t": 1712675102.7504382}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 270.291}}, "t": 1712675103.263486}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 272.904}}, "t": 1712675103.7764468}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.198261260986328, total / elapsed =637.8465777279478 in_token_count =278 out_token_count =1762\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 637.8465777279478, "units": "Tok/s", "t": 1712675107.4225307}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 271.032}}, "t": 1712675104.2853668}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 271.625}}, "t": 1712675104.7949405}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 273.391}}, "t": 1712675105.3024843}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 271.43}}, "t": 1712675105.8099208}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 273.69}}, "t": 1712675106.3196812}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 271.239}}, "t": 1712675106.8289952}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 267.762}}, "t": 1712675107.3391106}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.34328317642212, total / elapsed =216.62620748856108 in_token_count =6 out_token_count =2018\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 216.62620748856108, "units": "Tok/s", "t": 1712675116.7658322}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 273.189}}, "t": 1712675107.847431}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 271.599}}, "t": 1712675108.3603537}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 275.829}}, "t": 1712675108.8686001}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 69, "power": 271.14}}, "t": 1712675109.3786}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 69, "power": 266.905}}, "t": 1712675109.886135}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 273.393}}, "t": 1712675110.3967257}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 266.645}}, "t": 1712675110.9051373}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 269.807}}, "t": 1712675111.4139905}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 269.173}}, "t": 1712675111.922331}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 269.517}}, "t": 1712675112.4299421}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 270.392}}, "t": 1712675112.941104}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 272.123}}, "t": 1712675113.4538116}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 271.227}}, "t": 1712675113.9620984}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 270.657}}, "t": 1712675114.4704328}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 269.493}}, "t": 1712675114.9791453}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 267.18}}, "t": 1712675115.4882603}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 272.44}}, "t": 1712675115.995898}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 268.208}}, "t": 1712675116.5035985}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =1.657278060913086, total / elapsed =1337.1322847169554 in_token_count =344 out_token_count =1872\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 1337.1322847169554, "units": "Tok/s", "t": 1712675118.4231234}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 1.0, "temperature": 73, "power": 308.899}}, "t": 1712675117.0110452}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 270.753}}, "t": 1712675117.5209198}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 270.472}}, "t": 1712675118.0321057}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =7.126893758773804, total / elapsed =276.8386995486037 in_token_count =105 out_token_count =1868\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 276.8386995486037, "units": "Tok/s", "t": 1712675125.5500343}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 305.327}}, "t": 1712675118.5415921}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 266.145}}, "t": 1712675119.0499163}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 274.202}}, "t": 1712675119.559235}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 265.6}}, "t": 1712675120.0675502}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 272.619}}, "t": 1712675120.577224}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 272.619}}, "t": 1712675121.0877984}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 275.92}}, "t": 1712675121.5960968}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 266.414}}, "t": 1712675122.106842}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 262.376}}, "t": 1712675122.6155283}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 263.223}}, "t": 1712675123.1282315}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 275.006}}, "t": 1712675123.6365895}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 276.095}}, "t": 1712675124.1487944}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 275.917}}, "t": 1712675124.657912}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 276.113}}, "t": 1712675125.167293}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.102782487869263, total / elapsed =220.7017472599477 in_token_count =17 out_token_count =1992\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 220.7017472599477, "units": "Tok/s", "t": 1712675134.652834}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 291.201}}, "t": 1712675125.6757104}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 260.654}}, "t": 1712675126.1847363}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 273.983}}, "t": 1712675126.6927528}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 264.532}}, "t": 1712675127.203078}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 276.369}}, "t": 1712675127.7124991}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 272.055}}, "t": 1712675128.2258086}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 275.17}}, "t": 1712675128.7341878}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 271.074}}, "t": 1712675129.2443748}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 272.415}}, "t": 1712675129.757249}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 271.532}}, "t": 1712675130.2663765}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 275.084}}, "t": 1712675130.7763019}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 274.199}}, "t": 1712675131.2845726}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 276.537}}, "t": 1712675131.7950573}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 276.954}}, "t": 1712675132.304571}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 276.345}}, "t": 1712675132.814059}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 276.922}}, "t": 1712675133.3228757}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 272.712}}, "t": 1712675133.8354168}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 271.542}}, "t": 1712675134.347619}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =4.62080979347229, total / elapsed =459.22686603497505 in_token_count =216 out_token_count =1906\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 459.22686603497505, "units": "Tok/s", "t": 1712675139.273659}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 73, "power": 299.783}}, "t": 1712675134.8647687}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 272.558}}, "t": 1712675135.37597}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 279.854}}, "t": 1712675135.8871684}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 268.471}}, "t": 1712675136.398561}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 276.745}}, "t": 1712675136.9068375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 276.824}}, "t": 1712675137.4160597}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 274.602}}, "t": 1712675137.9236534}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 275.87}}, "t": 1712675138.433874}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 272.515}}, "t": 1712675138.9422107}, "pipe": "data"} +{"event": "end", "data": {"command": ["python", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1712675140.1891625, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/opt-1_3b-multinode.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/opt-1_3b-multinode.data new file mode 100644 index 000000000..3e2c6cea0 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/opt-1_3b-multinode.data @@ -0,0 +1 @@ +{"event": "error", "data": {"type": "KeyError", "message": "'port'", "trace": "Traceback (most recent call last):\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/multi.py\", line 202, in do_run\n await exec_plan.execute(\"run\", timeout=True, timeout_delay=600)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 108, in execute\n return await execute_command(self, phase, timeout, timeout_delay, **kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/executors.py\", line 57, in execute_command\n for pack, argv, _kwargs in command.commands():\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 125, in commands\n yield self.pack, self.argv(), self.kwargs()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 121, in argv\n return self._argv(**kwargs) + self.exec.argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 304, in argv\n script_args = self.exec.argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 122, in argv\n return self._argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 682, in _argv\n f\"--main_process_port={manager['port']}\",\nKeyError: 'port'\n"}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/opt-1_3b.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/opt-1_3b.data new file mode 100644 index 000000000..3e2c6cea0 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/opt-1_3b.data @@ -0,0 +1 @@ +{"event": "error", "data": {"type": "KeyError", "message": "'port'", "trace": "Traceback (most recent call last):\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/multi.py\", line 202, in do_run\n await exec_plan.execute(\"run\", timeout=True, timeout_delay=600)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 108, in execute\n return await execute_command(self, phase, timeout, timeout_delay, **kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/executors.py\", line 57, in execute_command\n for pack, argv, _kwargs in command.commands():\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 125, in commands\n yield self.pack, self.argv(), self.kwargs()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 121, in argv\n return self._argv(**kwargs) + self.exec.argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 304, in argv\n script_args = self.exec.argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 122, in argv\n return self._argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 682, in _argv\n f\"--main_process_port={manager['port']}\",\nKeyError: 'port'\n"}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/opt-6_7b-multinode.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/opt-6_7b-multinode.data new file mode 100644 index 000000000..3e2c6cea0 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/opt-6_7b-multinode.data @@ -0,0 +1 @@ +{"event": "error", "data": {"type": "KeyError", "message": "'port'", "trace": "Traceback (most recent call last):\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/multi.py\", line 202, in do_run\n await exec_plan.execute(\"run\", timeout=True, timeout_delay=600)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 108, in execute\n return await execute_command(self, phase, timeout, timeout_delay, **kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/executors.py\", line 57, in execute_command\n for pack, argv, _kwargs in command.commands():\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 125, in commands\n yield self.pack, self.argv(), self.kwargs()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 121, in argv\n return self._argv(**kwargs) + self.exec.argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 304, in argv\n script_args = self.exec.argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 122, in argv\n return self._argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 682, in _argv\n f\"--main_process_port={manager['port']}\",\nKeyError: 'port'\n"}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/opt-6_7b.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/opt-6_7b.data new file mode 100644 index 000000000..3e2c6cea0 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/opt-6_7b.data @@ -0,0 +1 @@ +{"event": "error", "data": {"type": "KeyError", "message": "'port'", "trace": "Traceback (most recent call last):\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/multi.py\", line 202, in do_run\n await exec_plan.execute(\"run\", timeout=True, timeout_delay=600)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 108, in execute\n return await execute_command(self, phase, timeout, timeout_delay, **kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/executors.py\", line 57, in execute_command\n for pack, argv, _kwargs in command.commands():\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 125, in commands\n yield self.pack, self.argv(), self.kwargs()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 121, in argv\n return self._argv(**kwargs) + self.exec.argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 304, in argv\n script_args = self.exec.argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 122, in argv\n return self._argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 682, in _argv\n f\"--main_process_port={manager['port']}\",\nKeyError: 'port'\n"}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/reformer.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/reformer.D0.data new file mode 100644 index 000000000..aaa969b48 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/reformer.D0.data @@ -0,0 +1,291 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}, {"aliaslist": [], "hostname": "majorpossum.eastus2.cloudapp.azure.com", "ip": "majorpossum.eastus2.cloudapp.azure.com", "ipaddrlist": ["40.70.216.57"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": false, "main": false, "name": "node1", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "suvojova.2024-04-09_15:00:22.247229", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f837b72a6bbb71a07eed38e1458b35b0", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Reformer", "--batch-size": 64}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "transformer"], "weight": 1.0, "name": "reformer", "tag": ["reformer", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "realbird", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-e5df2666-9a32-cf33-2409-1971e6e097bd": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 69, "power": 92.739, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712676010.115155, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-reformer.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Reformer", "--batch-size", "64"], "time": 1712676010.130953}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "line", "data": "We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 5.8769121170043945}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.848538398742676}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 70, "power": 323.938}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.812562465667725}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.789477348327637}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.774750709533691}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 70, "power": 294.02}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.76361608505249}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.755309104919434}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.906962821250794, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.750979423522949}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 70, "power": 260.061}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.043027195252854, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.745698928833008}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.9243260471431, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.74074649810791}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.02130212113955, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.7361650466918945}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 71, "power": 302.471}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.19425230541794, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.732023239135742}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.12230194488751, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.729157447814941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.153899581716885, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.726192951202393}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 71, "power": 303.604}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.10174057666296, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.72298002243042}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.128422898030735, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.71983528137207}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 71, "power": 303.187}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.13616031282563, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.716894626617432}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.09510884334246, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.714244365692139}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.07385775884862, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.711149215698242}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 71, "power": 322.392}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.04404631201518, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.708614349365234}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.07188405723557, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.705747604370117}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.0181642048456, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.703686237335205}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 71, "power": 302.23}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.04744456817093, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.700289726257324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.03800350998764, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.698376655578613}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.058287003327, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.695657730102539}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 72, "power": 323.91}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.06342688847907, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.692750453948975}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.046636181780315, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.689994812011719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.06250233891226, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.687404155731201}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 0.98, "temperature": 72, "power": 238.704}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.10101520747751, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.6849188804626465}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.069953967934495, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.680624008178711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.996513321101546, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.678560256958008}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 71, "power": 302.418}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.89756260887945, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.675793647766113}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.98095796518622, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.67247200012207}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 72, "power": 308.331}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.96808185765724, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.668625831604004}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.980385942250976, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.664340496063232}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.05340290373408, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.66124963760376}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 72, "power": 277.625}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.99905827783671, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.656659126281738}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.00934099985225, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.652324676513672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.000547475352256, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.6477556228637695}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 73, "power": 312.5}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.08476025319592, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.642111778259277}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.01837960233835, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.637774467468262}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.031684551841764, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.630337238311768}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 0.98, "temperature": 73, "power": 275.89}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.00352248155528, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.6230597496032715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.98526173288751, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.616440773010254}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.00637267811439, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.609414577484131}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 72, "power": 272.129}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.96536637168753, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.600627422332764}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.99907047842901, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.5910491943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 73, "power": 291.409}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.94914017544962, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.58207368850708}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.95946778803844, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.574241638183594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.96780679283457, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.563775539398193}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 73, "power": 323.504}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.976085078282615, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.550521373748779}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.968645846886595, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.539132118225098}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.0076789048124, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.524212837219238}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 72, "power": 255.682}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.09746485739254, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.511083602905273}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.06964665928443, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.4988694190979}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.03562859438624, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.485479831695557}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 0.98, "temperature": 74, "power": 307.145}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.07121482338936, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.472776889801025}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.03399579723318, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.465888977050781}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.070866875261736, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.456325054168701}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 73, "power": 303.374}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.03089501527971, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.4465227127075195}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.02409905333129, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.443489074707031}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 73, "power": 303.483}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.00893156928944, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.430057525634766}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.010549376935586, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.419443130493164}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.033773067939926, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.403115272521973}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 73, "power": 290.327}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.97103168181741, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.3853583335876465}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.01329910457481, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.374266147613525}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.0011240265609, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.365095138549805}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 74, "power": 310.48}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.96988594857176, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 73, "power": 323.452}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-reformer.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Reformer", "--batch-size", "64"], "time": 1712676085.2607641, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/regnet_y_128gf.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/regnet_y_128gf.D0.data new file mode 100644 index 000000000..4844d7b97 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/regnet_y_128gf.D0.data @@ -0,0 +1,408 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}, {"aliaslist": [], "hostname": "majorpossum.eastus2.cloudapp.azure.com", "ip": "majorpossum.eastus2.cloudapp.azure.com", "ipaddrlist": ["40.70.216.57"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": false, "main": false, "name": "node1", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "suvojova.2024-04-09_15:00:22.247229", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f837b72a6bbb71a07eed38e1458b35b0", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "regnet_y_128gf", "--batch-size": 64}, "tags": ["classification", "convnet", "lstm", "resnet", "vision"], "weight": 2.0, "name": "regnet_y_128gf", "tag": ["regnet_y_128gf", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "realbird", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-e5df2666-9a32-cf33-2409-1971e6e097bd": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 65, "power": 87.932, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712675638.402877, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-regnet_y_128gf.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "regnet_y_128gf", "--batch-size", "64"], "time": 1712675638.418169}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 61, "power": 52.6}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3952.375, 81920.0], "load": 0.09, "temperature": 60, "power": 81.488}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03619384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00616455078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94366455078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92230224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.95, "temperature": 66, "power": 321.132}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02435302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.16741943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96478271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.15771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 66, "power": 309.077}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.017822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 87.26074327888509, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13763427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05963134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.26960056710469, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03607177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.96, "temperature": 66, "power": 313.578}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97662353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.55586355850865, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.35614013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.4664306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.15874845586949, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.239990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 66, "power": 289.046}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.21092417115871, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.17510986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.5086669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.65088484032862, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.4044189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9764404296875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 67, "power": 336.422}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 77.00591756605924, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06768798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.12451171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.1601589165492, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07763671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01544189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 81.17649953123869, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.95, "temperature": 66, "power": 296.388}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04644775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14105224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 88.48866938537043, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.45257568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.77803481770871, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.43798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08331298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.96, "temperature": 67, "power": 301.939}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.58643256864286, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04290771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01446533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.67682836671747, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9464111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.25390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.73405122650279, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 68, "power": 295.17}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.16436767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.26544189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.27334950014907, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03173828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1256103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.29714782324247, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.007080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.99, "temperature": 68, "power": 291.388}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.047119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.60947662900763, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1983642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.35552978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.14087222509944, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92474365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 69, "power": 294.633}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2254638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.76809961297405, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.22601318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05743408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 84.05425546427176, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06365966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.277587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.02664708638703, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 70, "power": 309.14}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.32281494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 65.29930753339123, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.38861083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.32041319957217, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0684814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.99, "temperature": 70, "power": 305.538}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9925537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.914486544162, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0540771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.20361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.1542391023974, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.15069580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.054931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 70, "power": 317.613}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.5849713038805, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.018310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.141845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.06052646409343, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9903564453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00531005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.85846005650089, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 71, "power": 304.721}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86383056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1290283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 84.88521595953179, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7943115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 63.95658895359503, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00726318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.99, "temperature": 71, "power": 302.613}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90667724609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 65.88316244662458, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90716552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01605224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 88.06200555338134, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.853515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 86.72715469235435, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.96, "temperature": 70, "power": 303.637}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98956298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.34228627526218, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0294189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.85315418870302, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84466552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.96, "temperature": 72, "power": 292.837}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89312744140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.19295509198776, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9566650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14141845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.54997808036232, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14886474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 73, "power": 296.895}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1663818359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.23816302530895, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2611083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05230712890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.40844079115186, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.191650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1234130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 85.10307510593907, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.99, "temperature": 73, "power": 308.807}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02764892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.29310625940927, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.987060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0841064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.35395622369606, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92816162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 73, "power": 315.368}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04681396484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.71947289959395, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9100341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96356201171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.51599010567666, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97906494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.5179443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.23365043774402, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 74, "power": 302.54}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2796630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.15191650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.25041101318877, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99945068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.021728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.25673353468804, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10284423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.96, "temperature": 74, "power": 318.95}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.0922776567834, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02862548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8790283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 74.96028962552337, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13043212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05841064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.86338344578424, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 74, "power": 291.095}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0330810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97674560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.9177459810072, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.036376953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.22381591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.665075516237, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03515625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.96, "temperature": 75, "power": 319.102}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04339599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 84.84129871014196, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.39404296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.95051069072913, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99554443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.1555102846423, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 75, "power": 296.76}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.09002685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.20389202174499, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98956298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0643310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.17945837354958, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.96, "temperature": 75, "power": 301.067}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2716064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.86949430801981, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99871826171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9783935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 83.23426878448747, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92926025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 75, "power": 303.538}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98858642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.33822696151925, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.114501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 65.48962645391408, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 75, "power": 292.937}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-regnet_y_128gf.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "regnet_y_128gf", "--batch-size", "64"], "time": 1712675730.8646464, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/resnet152-multi.0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/resnet152-multi.0.data new file mode 100644 index 000000000..edcf6b5c9 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/resnet152-multi.0.data @@ -0,0 +1,299 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}, {"aliaslist": [], "hostname": "majorpossum.eastus2.cloudapp.azure.com", "ip": "majorpossum.eastus2.cloudapp.azure.com", "ipaddrlist": ["40.70.216.57"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": false, "main": false, "name": "node1", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "suvojova.2024-04-09_15:00:22.247229", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f837b72a6bbb71a07eed38e1458b35b0", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "njobs", "n": 1}, "argv": {"--amp": true, "--model": "resnet152", "--batch-size": 256}, "tags": ["classification", "convnet", "multigpu", "resnet", "vision"], "weight": 5.0, "name": "resnet152-multi", "tag": ["resnet152-multi", "0"], "job-number": 0, "devices": ["0"]}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "realbird", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-e5df2666-9a32-cf33-2409-1971e6e097bd": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.57, "memory": 0.010771942138671876}, "temperature": 61, "power": 83.841, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712676306.282432, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/suvojova.2024-04-09_15:00:22.247229/resnet152-multi.0", "--checkpoint-hist", "1"], "time": 1712676306.2981288}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model resnet152 created, param count:60192808\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.1) calculated from base learning rate (0.1) and global batch size (256) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.928679466247559}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24026.375, 81920.0], "load": 1.0, "temperature": 61, "power": 290.815}}}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 0/16 ( 0%)] Loss: 6.929 (6.93) Time: 3.451s, 74.18/s (3.451s, 74.18/s) LR: 1.000e-05 Data: 1.277 (1.277)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.93458366394043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958017349243164}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27210.375, 81920.0], "load": 1.0, "temperature": 62, "power": 304.63}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.951747417449951}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938846111297607}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27210.375, 81920.0], "load": 0.94, "temperature": 62, "power": 288.541}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.954232215881348}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 15/16 (100%)] Loss: 6.934 (6.94) Time: 0.359s, 712.97/s (0.582s, 440.12/s) LR: 1.000e-05 Data: 0.000 (0.096)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.135 (1.135) Loss: 6.9339 (6.9339) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.238 (0.248) Loss: 6.8979 (6.9128) Acc@1: 0.0000 ( 0.1453) Acc@5: 28.1250 ( 0.5329)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/suvojova.2024-04-09_15:00:22.247229/resnet152-multi.0/20240409-152510-resnet152-224/checkpoint-0.pth.tar', 0.14534883720930233)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 711.7738160311249, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27694.375, 81920.0], "load": 0, "temperature": 58, "power": 76.618}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4722.375, 81920.0], "load": 0.63, "temperature": 58, "power": 76.648}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.963786602020264}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 0/16 ( 0%)] Loss: 6.964 (6.96) Time: 1.414s, 181.08/s (1.414s, 181.08/s) LR: 2.001e-02 Data: 1.049 (1.049)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.906628608703613}, "pipe": "data"} +{"event": "data", "data": {"rate": 627.4346870820842, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27234.375, 81920.0], "load": 0.99, "temperature": 62, "power": 284.636}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9278669357299805}, "pipe": "data"} +{"event": "data", "data": {"rate": 629.0199596780514, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.902290344238281}, "pipe": "data"} +{"event": "data", "data": {"rate": 629.0764270218066, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.895358085632324}, "pipe": "data"} +{"event": "data", "data": {"rate": 632.4244421582246, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27234.375, 81920.0], "load": 0.99, "temperature": 62, "power": 301.224}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.977859020233154}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 15/16 (100%)] Loss: 6.978 (6.92) Time: 0.364s, 703.75/s (0.440s, 581.48/s) LR: 2.001e-02 Data: 0.000 (0.077)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 639.6796425712702, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.948 (0.948) Loss: 6.8481 (6.8481) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.017 (0.225) Loss: 6.5615 (6.8343) Acc@1: 18.7500 ( 0.2422) Acc@5: 28.1250 ( 1.1870)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/suvojova.2024-04-09_15:00:22.247229/resnet152-multi.0/20240409-152510-resnet152-224/checkpoint-1.pth.tar', 0.24224806201550386)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 703.6818782650307, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27720.375, 81920.0], "load": 0.94, "temperature": 62, "power": 193.144}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27720.375, 81920.0], "load": 0, "temperature": 57, "power": 76.343}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.832669258117676}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 0/16 ( 0%)] Loss: 6.833 (6.83) Time: 1.428s, 179.27/s (1.428s, 179.27/s) LR: 4.001e-02 Data: 1.065 (1.065)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 616.4292317715061, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8587846755981445}, "pipe": "data"} +{"event": "data", "data": {"rate": 670.108394570014, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.900918960571289}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28204.375, 81920.0], "load": 1.0, "temperature": 61, "power": 143.908}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 657.1083025600659, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9411797523498535}, "pipe": "data"} +{"event": "data", "data": {"rate": 626.7502168121187, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97545051574707}, "pipe": "data"} +{"event": "data", "data": {"rate": 628.9964426377271, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28204.375, 81920.0], "load": 0.93, "temperature": 62, "power": 205.392}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.949697494506836}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 15/16 (100%)] Loss: 6.950 (6.92) Time: 0.366s, 700.35/s (0.442s, 579.80/s) LR: 4.001e-02 Data: 0.000 (0.077)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 636.6558653246302, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.942 (0.942) Loss: 6.7928 (6.7928) Acc@1: 0.0000 ( 0.0000) Acc@5: 3.5156 ( 3.5156)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.224) Loss: 6.3893 (6.8026) Acc@1: 0.0000 ( 0.2422) Acc@5: 0.0000 ( 1.0659)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 700.2038949979491, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28688.375, 81920.0], "load": 0.11, "temperature": 59, "power": 77.49}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28688.375, 81920.0], "load": 0, "temperature": 57, "power": 75.922}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85746955871582}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 0/16 ( 0%)] Loss: 6.857 (6.86) Time: 1.421s, 180.19/s (1.421s, 180.19/s) LR: 6.000e-02 Data: 1.058 (1.058)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 497.43071401255213, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.927488803863525}, "pipe": "data"} +{"event": "data", "data": {"rate": 625.2409238287324, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958431720733643}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29172.375, 81920.0], "load": 0.99, "temperature": 62, "power": 313.351}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 623.7027844952562, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.987691402435303}, "pipe": "data"} +{"event": "data", "data": {"rate": 626.495810241154, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.062805652618408}, "pipe": "data"} +{"event": "data", "data": {"rate": 628.6159449169451, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0275468826293945}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 15/16 (100%)] Loss: 7.028 (6.96) Time: 0.364s, 704.18/s (0.442s, 579.48/s) LR: 6.000e-02 Data: 0.000 (0.077)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29172.375, 81920.0], "load": 0.99, "temperature": 63, "power": 309.321}}}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.935 (0.935) Loss: 6.9124 (6.9124) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.017 (0.222) Loss: 6.4215 (6.8218) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 1.1386)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/suvojova.2024-04-09_15:00:22.247229/resnet152-multi.0/20240409-152510-resnet152-224/checkpoint-3.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 702.7821567069418, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29656.375, 81920.0], "load": 0.92, "temperature": 63, "power": 308.233}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8767218589782715}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 0/16 ( 0%)] Loss: 6.877 (6.88) Time: 1.355s, 188.93/s (1.355s, 188.93/s) LR: 8.000e-02 Data: 0.990 (0.990)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29992.375, 81920.0], "load": 0.3, "temperature": 62, "power": 310.233}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 627.5729710410792, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93961763381958}, "pipe": "data"} +{"event": "data", "data": {"rate": 621.9668582513825, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.977311611175537}, "pipe": "data"} +{"event": "data", "data": {"rate": 622.8867229690081, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30140.375, 81920.0], "load": 0.93, "temperature": 63, "power": 311.737}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.044893264770508}, "pipe": "data"} +{"event": "data", "data": {"rate": 627.6537879456525, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.071674823760986}, "pipe": "data"} +{"event": "data", "data": {"rate": 627.5849497823561, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043305397033691}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 15/16 (100%)] Loss: 7.043 (7.00) Time: 0.364s, 703.19/s (0.438s, 584.17/s) LR: 8.000e-02 Data: 0.000 (0.073)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.932 (0.932) Loss: 6.8127 (6.8127) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.017 (0.221) Loss: 6.4574 (6.8443) Acc@1: 0.0000 ( 0.2907) Acc@5: 0.0000 ( 1.1386)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/suvojova.2024-04-09_15:00:22.247229/resnet152-multi.0/20240409-152510-resnet152-224/checkpoint-4.pth.tar', 0.29069767441860467)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 701.9765547453741, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30140.375, 81920.0], "load": 0.56, "temperature": 59, "power": 78.236}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30624.375, 81920.0], "load": 0, "temperature": 61, "power": 310.263}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87186861038208}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 0/16 ( 0%)] Loss: 6.872 (6.87) Time: 1.374s, 186.32/s (1.374s, 186.32/s) LR: 9.993e-02 Data: 1.010 (1.010)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 701.8515071683322, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31108.375, 81920.0], "load": 0.99, "temperature": 62, "power": 252.535}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.973188400268555}, "pipe": "data"} +{"event": "data", "data": {"rate": 599.6109913608716, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.078207015991211}, "pipe": "data"} +{"event": "data", "data": {"rate": 623.5657753735211, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.114728927612305}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31108.375, 81920.0], "load": 0.99, "temperature": 62, "power": 297.126}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 627.4693825722436, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1046366691589355}, "pipe": "data"} +{"event": "data", "data": {"rate": 627.7226909794998, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.109697341918945}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 15/16 (100%)] Loss: 7.110 (7.04) Time: 0.365s, 701.34/s (0.439s, 583.05/s) LR: 9.993e-02 Data: 0.000 (0.074)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.928 (0.928) Loss: 6.9445 (6.9445) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.3906 ( 0.3906)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.224) Loss: 6.2512 (6.8818) Acc@1: 0.0000 ( 0.2422) Acc@5: 3.1250 ( 0.9932)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 701.3575966216138, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31108.375, 81920.0], "load": 0, "temperature": 59, "power": 77.099}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31592.375, 81920.0], "load": 0.94, "temperature": 63, "power": 279.632}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.901736736297607}, "pipe": "data"} +{"event": "line", "data": "Train: 6 [ 0/16 ( 0%)] Loss: 6.902 (6.90) Time: 1.394s, 183.65/s (1.394s, 183.65/s) LR: 9.990e-02 Data: 1.030 (1.030)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 627.6216257896866, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.024867057800293}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32076.375, 81920.0], "load": 0.92, "temperature": 62, "power": 291.114}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 699.9231672102211, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98797607421875}, "pipe": "data"} +{"event": "data", "data": {"rate": 601.1648273019006, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.009696960449219}, "pipe": "data"} +{"event": "data", "data": {"rate": 624.8884873069062, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32076.375, 81920.0], "load": 0.93, "temperature": 63, "power": 284.883}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.109463691711426}, "pipe": "data"} +{"event": "data", "data": {"rate": 627.162873348109, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.162814140319824}, "pipe": "data"} +{"event": "line", "data": "Train: 6 [ 15/16 (100%)] Loss: 7.163 (7.05) Time: 0.364s, 704.02/s (0.440s, 581.19/s) LR: 9.990e-02 Data: 0.000 (0.076)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.947 (0.947) Loss: 6.8566 (6.8566) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.019 (0.229) Loss: 6.4708 (6.8683) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 1.1143)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 702.8978441724995, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32560.375, 81920.0], "load": 0.95, "temperature": 63, "power": 121.375}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32560.375, 81920.0], "load": 0.03, "temperature": 59, "power": 78.572}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.886444091796875}, "pipe": "data"} +{"event": "line", "data": "Train: 7 [ 0/16 ( 0%)] Loss: 6.886 (6.89) Time: 1.373s, 186.42/s (1.373s, 186.42/s) LR: 9.987e-02 Data: 1.009 (1.009)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.944457530975342}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33044.375, 81920.0], "load": 0.92, "temperature": 61, "power": 291.475}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 623.9765895293184, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.029455184936523}, "pipe": "data"} +{"event": "data", "data": {"rate": 624.3555307501415, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.085527420043945}, "pipe": "data"} +{"event": "data", "data": {"rate": 622.3916189237323, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33044.375, 81920.0], "load": 0.94, "temperature": 62, "power": 294.729}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.142531394958496}, "pipe": "data"} +{"event": "data", "data": {"rate": 627.0275283136466, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.084604263305664}, "pipe": "data"} +{"event": "line", "data": "Train: 7 [ 15/16 (100%)] Loss: 7.085 (7.03) Time: 0.366s, 699.29/s (0.439s, 582.52/s) LR: 9.987e-02 Data: 0.000 (0.074)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 632.8035080220513, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.926 (0.926) Loss: 6.9009 (6.9009) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.017 (0.224) Loss: 6.6355 (6.8677) Acc@1: 0.0000 ( 0.1938) Acc@5: 0.0000 ( 0.8963)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 698.9210351916024, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0.92, "temperature": 61, "power": 326.004}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0.03, "temperature": 59, "power": 77.148}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.892055988311768}, "pipe": "data"} +{"event": "line", "data": "Train: 8 [ 0/16 ( 0%)] Loss: 6.892 (6.89) Time: 1.358s, 188.54/s (1.358s, 188.54/s) LR: 9.982e-02 Data: 0.993 (0.993)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 569.8700252467113, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9829583168029785}, "pipe": "data"} +{"event": "data", "data": {"rate": 675.0773249653843, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34012.375, 81920.0], "load": 0.92, "temperature": 62, "power": 315.378}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.008730888366699}, "pipe": "data"} +{"event": "data", "data": {"rate": 698.6308205887834, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.034060955047607}, "pipe": "data"} +{"event": "data", "data": {"rate": 605.0436063967331, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.016059875488281}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34012.375, 81920.0], "load": 0.94, "temperature": 63, "power": 295.538}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 626.8758013582733, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1224870681762695}, "pipe": "data"} +{"event": "line", "data": "Train: 8 [ 15/16 (100%)] Loss: 7.122 (7.01) Time: 0.365s, 701.21/s (0.439s, 583.46/s) LR: 9.982e-02 Data: 0.000 (0.073)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 633.224888404255, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.951 (0.951) Loss: 6.8927 (6.8927) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.017 (0.223) Loss: 6.6890 (6.8626) Acc@1: 0.0000 ( 0.1938) Acc@5: 0.0000 ( 0.8236)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 701.0663128246233, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34496.375, 81920.0], "load": 0.18, "temperature": 61, "power": 316.103}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34496.375, 81920.0], "load": 0, "temperature": 58, "power": 76.46}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9105730056762695}, "pipe": "data"} +{"event": "line", "data": "Train: 9 [ 0/16 ( 0%)] Loss: 6.911 (6.91) Time: 1.364s, 187.64/s (1.364s, 187.64/s) LR: 9.978e-02 Data: 1.000 (1.000)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 541.6544570487671, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.894057273864746}, "pipe": "data"} +{"event": "data", "data": {"rate": 695.1072683698735, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34980.375, 81920.0], "load": 0.99, "temperature": 63, "power": 315.179}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.012039661407471}, "pipe": "data"} +{"event": "data", "data": {"rate": 664.5500999803511, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.978803634643555}, "pipe": "data"} +{"event": "data", "data": {"rate": 653.860249220392, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.094475746154785}, "pipe": "data"} +{"event": "data", "data": {"rate": 628.1924183511015, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34980.375, 81920.0], "load": 0.93, "temperature": 63, "power": 318.823}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.032122611999512}, "pipe": "data"} +{"event": "line", "data": "Train: 9 [ 15/16 (100%)] Loss: 7.032 (6.98) Time: 0.366s, 699.57/s (0.439s, 582.97/s) LR: 9.978e-02 Data: 0.000 (0.074)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 631.7845173731192, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.930 (0.930) Loss: 6.8367 (6.8367) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.223) Loss: 6.4412 (6.8317) Acc@1: 0.0000 ( 0.2422) Acc@5: 0.0000 ( 1.2112)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 699.6778005593355, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35464.375, 81920.0], "load": 0.94, "temperature": 63, "power": 116.691}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35464.375, 81920.0], "load": 0, "temperature": 58, "power": 76.401}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.844865798950195}, "pipe": "data"} +{"event": "line", "data": "Train: 10 [ 0/16 ( 0%)] Loss: 6.845 (6.84) Time: 1.401s, 182.70/s (1.401s, 182.70/s) LR: 9.973e-02 Data: 1.037 (1.037)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 670.127501756768, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.920116424560547}, "pipe": "data"} +{"event": "data", "data": {"rate": 700.1513936993932, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.907088279724121}, "pipe": "data"} +{"event": "data", "data": {"rate": 601.2379538790672, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "stop", "data": null, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/suvojova.2024-04-09_15:00:22.247229/resnet152-multi.0", "--checkpoint-hist", "1"], "time": 1712676433.7593145, "return_code": -15}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/resnet152.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/resnet152.D0.data new file mode 100644 index 000000000..719d338d9 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/resnet152.D0.data @@ -0,0 +1,296 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}, {"aliaslist": [], "hostname": "majorpossum.eastus2.cloudapp.azure.com", "ip": "majorpossum.eastus2.cloudapp.azure.com", "ipaddrlist": ["40.70.216.57"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": false, "main": false, "name": "node1", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "suvojova.2024-04-09_15:00:22.247229", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f837b72a6bbb71a07eed38e1458b35b0", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "resnet152", "--batch-size": 256}, "tags": ["classification", "convnet", "resnet", "vision"], "weight": 1.0, "name": "resnet152", "tag": ["resnet152", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "realbird", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-e5df2666-9a32-cf33-2409-1971e6e097bd": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 68, "power": 90.715, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712676173.605796, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/suvojova.2024-04-09_15:00:22.247229/resnet152.D0", "--checkpoint-hist", "1"], "time": 1712676173.6215441}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model resnet152 created, param count:60192808\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.1) calculated from base learning rate (0.1) and global batch size (256) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.928679466247559}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 0/16 ( 0%)] Loss: 6.929 (6.93) Time: 3.380s, 75.73/s (3.380s, 75.73/s) LR: 1.000e-05 Data: 1.202 (1.202)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24484.375, 81920.0], "load": 1.0, "temperature": 67, "power": 152.525}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93458366394043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958017349243164}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27230.375, 81920.0], "load": 1.0, "temperature": 67, "power": 294.555}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.951747417449951}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938846111297607}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27230.375, 81920.0], "load": 0.93, "temperature": 68, "power": 279.284}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.954232215881348}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 15/16 (100%)] Loss: 6.934 (6.94) Time: 0.359s, 712.12/s (0.579s, 442.33/s) LR: 1.000e-05 Data: 0.000 (0.091)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.135 (1.135) Loss: 6.9339 (6.9339) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.236 (0.247) Loss: 6.8979 (6.9128) Acc@1: 0.0000 ( 0.1453) Acc@5: 28.1250 ( 0.5329)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/suvojova.2024-04-09_15:00:22.247229/resnet152.D0/20240409-152258-resnet152-224/checkpoint-0.pth.tar', 0.14534883720930233)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 710.4365621203639, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27714.375, 81920.0], "load": 0, "temperature": 63, "power": 82.247}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4958.375, 81920.0], "load": 0.26, "temperature": 63, "power": 81.924}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.963780879974365}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 0/16 ( 0%)] Loss: 6.964 (6.96) Time: 1.337s, 191.48/s (1.337s, 191.48/s) LR: 2.001e-02 Data: 0.972 (0.972)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.906675338745117}, "pipe": "data"} +{"event": "data", "data": {"rate": 624.4795952552595, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27210.375, 81920.0], "load": 0.93, "temperature": 67, "power": 305.656}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9277119636535645}, "pipe": "data"} +{"event": "data", "data": {"rate": 627.3320522782624, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.902412414550781}, "pipe": "data"} +{"event": "data", "data": {"rate": 626.7012603368722, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.895235061645508}, "pipe": "data"} +{"event": "data", "data": {"rate": 627.3861038508576, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27210.375, 81920.0], "load": 0.93, "temperature": 67, "power": 248.264}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.977532386779785}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 15/16 (100%)] Loss: 6.978 (6.92) Time: 0.366s, 699.34/s (0.437s, 586.11/s) LR: 2.001e-02 Data: 0.000 (0.072)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 630.5009343962276, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.986 (0.986) Loss: 6.8463 (6.8463) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.227) Loss: 6.5618 (6.8343) Acc@1: 21.8750 ( 0.2665) Acc@5: 28.1250 ( 1.2355)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/suvojova.2024-04-09_15:00:22.247229/resnet152.D0/20240409-152258-resnet152-224/checkpoint-1.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 699.2722760831838, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27694.375, 81920.0], "load": 0.58, "temperature": 67, "power": 304.141}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27694.375, 81920.0], "load": 0, "temperature": 62, "power": 80.313}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.832426071166992}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 0/16 ( 0%)] Loss: 6.832 (6.83) Time: 1.381s, 185.31/s (1.381s, 185.31/s) LR: 4.001e-02 Data: 1.017 (1.017)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 565.9094811072955, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.858511924743652}, "pipe": "data"} +{"event": "data", "data": {"rate": 690.2198129422613, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.896279335021973}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28178.375, 81920.0], "load": 0.99, "temperature": 66, "power": 276.712}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 622.957653004836, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947450637817383}, "pipe": "data"} +{"event": "data", "data": {"rate": 623.8860288511853, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.976815223693848}, "pipe": "data"} +{"event": "data", "data": {"rate": 627.4410571862867, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28178.375, 81920.0], "load": 0.99, "temperature": 67, "power": 297.266}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.936852931976318}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 15/16 (100%)] Loss: 6.937 (6.92) Time: 0.366s, 699.91/s (0.440s, 581.23/s) LR: 4.001e-02 Data: 0.000 (0.075)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 630.944057834751, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.938 (0.938) Loss: 6.8009 (6.8009) Acc@1: 0.0000 ( 0.0000) Acc@5: 3.1250 ( 3.1250)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.225) Loss: 6.3800 (6.8036) Acc@1: 0.0000 ( 0.2422) Acc@5: 9.3750 ( 1.1628)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 700.1051470417714, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28662.375, 81920.0], "load": 0.3, "temperature": 62, "power": 81.314}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28662.375, 81920.0], "load": 0, "temperature": 61, "power": 79.248}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8638081550598145}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 0/16 ( 0%)] Loss: 6.864 (6.86) Time: 1.363s, 187.76/s (1.363s, 187.76/s) LR: 6.000e-02 Data: 0.998 (0.998)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 517.7494711917236, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.915842056274414}, "pipe": "data"} +{"event": "data", "data": {"rate": 621.7150601048014, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.961362838745117}, "pipe": "data"} +{"event": "data", "data": {"rate": 621.8108184386938, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29146.375, 81920.0], "load": 0.93, "temperature": 66, "power": 287.301}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.992908477783203}, "pipe": "data"} +{"event": "data", "data": {"rate": 623.4462150543347, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.058210849761963}, "pipe": "data"} +{"event": "data", "data": {"rate": 625.8912185675637, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.029411315917969}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 15/16 (100%)] Loss: 7.029 (6.96) Time: 0.366s, 699.18/s (0.440s, 582.44/s) LR: 6.000e-02 Data: 0.000 (0.074)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.931 (0.931) Loss: 6.9279 (6.9279) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.017 (0.226) Loss: 6.3622 (6.8220) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 1.0901)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29146.375, 81920.0], "load": 0.98, "temperature": 66, "power": 302.106}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 698.9558711425851, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29630.375, 81920.0], "load": 0.9, "temperature": 65, "power": 120.711}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.872838020324707}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 0/16 ( 0%)] Loss: 6.873 (6.87) Time: 1.380s, 185.53/s (1.380s, 185.53/s) LR: 8.000e-02 Data: 1.014 (1.014)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30114.375, 81920.0], "load": 0.99, "temperature": 65, "power": 258.058}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 697.0237120169861, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944968223571777}, "pipe": "data"} +{"event": "data", "data": {"rate": 621.7690324775955, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.983036994934082}, "pipe": "data"} +{"event": "data", "data": {"rate": 624.735146463103, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.039030075073242}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30114.375, 81920.0], "load": 0.99, "temperature": 66, "power": 292.367}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 623.6324670776564, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.075578212738037}, "pipe": "data"} +{"event": "data", "data": {"rate": 626.0976253613834, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.048366546630859}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 15/16 (100%)] Loss: 7.048 (7.00) Time: 0.366s, 699.18/s (0.440s, 581.62/s) LR: 8.000e-02 Data: 0.000 (0.075)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.945 (0.945) Loss: 6.8227 (6.8227) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.226) Loss: 6.5076 (6.8515) Acc@1: 0.0000 ( 0.2180) Acc@5: 0.0000 ( 1.0417)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 699.7006979244784, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30114.375, 81920.0], "load": 0, "temperature": 62, "power": 80.118}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30598.375, 81920.0], "load": 0.07, "temperature": 65, "power": 194.643}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8834638595581055}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 0/16 ( 0%)] Loss: 6.883 (6.88) Time: 1.368s, 187.19/s (1.368s, 187.19/s) LR: 9.993e-02 Data: 1.003 (1.003)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 650.8112554237333, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31082.375, 81920.0], "load": 0.99, "temperature": 64, "power": 242.676}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.976649761199951}, "pipe": "data"} +{"event": "data", "data": {"rate": 623.3267233403061, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.082053184509277}, "pipe": "data"} +{"event": "data", "data": {"rate": 625.0111477614954, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1162919998168945}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31082.375, 81920.0], "load": 0.99, "temperature": 65, "power": 237.822}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 625.1966433729496, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.09298038482666}, "pipe": "data"} +{"event": "data", "data": {"rate": 625.1780765414262, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.087071418762207}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 15/16 (100%)] Loss: 7.087 (7.04) Time: 0.364s, 702.34/s (0.439s, 583.29/s) LR: 9.993e-02 Data: 0.000 (0.074)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.947 (0.947) Loss: 6.9135 (6.9135) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.223) Loss: 6.3655 (6.8680) Acc@1: 0.0000 ( 0.1938) Acc@5: 0.0000 ( 0.9690)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 701.3585785064511, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31418.375, 81920.0], "load": 0.3, "temperature": 65, "power": 316.669}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31566.375, 81920.0], "load": 0.94, "temperature": 63, "power": 79.878}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.909605979919434}, "pipe": "data"} +{"event": "line", "data": "Train: 6 [ 0/16 ( 0%)] Loss: 6.910 (6.91) Time: 1.371s, 186.76/s (1.371s, 186.76/s) LR: 9.990e-02 Data: 1.006 (1.006)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 676.2595278231291, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.013334274291992}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32050.375, 81920.0], "load": 0.99, "temperature": 65, "power": 299.358}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 620.647303183582, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.994913101196289}, "pipe": "data"} +{"event": "data", "data": {"rate": 622.8393031453288, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.028815746307373}, "pipe": "data"} +{"event": "data", "data": {"rate": 624.769540982827, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32050.375, 81920.0], "load": 0.94, "temperature": 66, "power": 302.806}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.095856666564941}, "pipe": "data"} +{"event": "data", "data": {"rate": 624.2719321903731, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.141505718231201}, "pipe": "data"} +{"event": "line", "data": "Train: 6 [ 15/16 (100%)] Loss: 7.142 (7.04) Time: 0.365s, 700.54/s (0.440s, 582.13/s) LR: 9.990e-02 Data: 0.000 (0.074)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.937 (0.937) Loss: 6.8027 (6.8027) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.017 (0.221) Loss: 6.5597 (6.8552) Acc@1: 0.0000 ( 0.2422) Acc@5: 0.0000 ( 1.0901)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 700.5090774593166, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32534.375, 81920.0], "load": 0.95, "temperature": 62, "power": 79.533}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32534.375, 81920.0], "load": 0.03, "temperature": 61, "power": 79.324}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8752217292785645}, "pipe": "data"} +{"event": "line", "data": "Train: 7 [ 0/16 ( 0%)] Loss: 6.875 (6.88) Time: 1.388s, 184.43/s (1.388s, 184.43/s) LR: 9.987e-02 Data: 1.024 (1.024)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 694.1467412816206, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.959608554840088}, "pipe": "data"} +{"event": "data", "data": {"rate": 620.5387460672235, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33018.375, 81920.0], "load": 0.92, "temperature": 65, "power": 291.263}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.018782615661621}, "pipe": "data"} +{"event": "data", "data": {"rate": 622.0064678322078, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.072922706604004}, "pipe": "data"} +{"event": "data", "data": {"rate": 623.9803065965423, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.12181282043457}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33018.375, 81920.0], "load": 0.94, "temperature": 65, "power": 287.88}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 624.1863810780986, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077692985534668}, "pipe": "data"} +{"event": "line", "data": "Train: 7 [ 15/16 (100%)] Loss: 7.078 (7.02) Time: 0.366s, 700.27/s (0.441s, 580.54/s) LR: 9.987e-02 Data: 0.000 (0.075)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.940 (0.940) Loss: 6.8366 (6.8366) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.224) Loss: 6.6423 (6.8614) Acc@1: 0.0000 ( 0.1453) Acc@5: 0.0000 ( 0.7752)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 700.4246849971308, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33502.375, 81920.0], "load": 0.9, "temperature": 64, "power": 321.427}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33502.375, 81920.0], "load": 0, "temperature": 60, "power": 77.989}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8963751792907715}, "pipe": "data"} +{"event": "line", "data": "Train: 8 [ 0/16 ( 0%)] Loss: 6.896 (6.90) Time: 1.380s, 185.56/s (1.380s, 185.56/s) LR: 9.982e-02 Data: 1.014 (1.014)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 692.0092304732472, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.960799694061279}, "pipe": "data"} +{"event": "data", "data": {"rate": 619.0879633688103, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.989874362945557}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33986.375, 81920.0], "load": 0.99, "temperature": 64, "power": 165.343}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 622.200978163616, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0238494873046875}, "pipe": "data"} +{"event": "data", "data": {"rate": 624.6348971409851, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.999516010284424}, "pipe": "data"} +{"event": "data", "data": {"rate": 625.224270713825, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33986.375, 81920.0], "load": 0.99, "temperature": 64, "power": 311.335}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.085169792175293}, "pipe": "data"} +{"event": "line", "data": "Train: 8 [ 15/16 (100%)] Loss: 7.085 (6.99) Time: 0.366s, 699.80/s (0.441s, 581.11/s) LR: 9.982e-02 Data: 0.000 (0.075)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.933 (0.933) Loss: 6.8261 (6.8261) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.223) Loss: 6.6501 (6.8468) Acc@1: 0.0000 ( 0.1938) Acc@5: 0.0000 ( 0.9448)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 699.7289863991764, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34470.375, 81920.0], "load": 0.61, "temperature": 61, "power": 79.629}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34470.375, 81920.0], "load": 0, "temperature": 60, "power": 77.79}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.926488399505615}, "pipe": "data"} +{"event": "line", "data": "Train: 9 [ 0/16 ( 0%)] Loss: 6.926 (6.93) Time: 1.411s, 181.46/s (1.411s, 181.46/s) LR: 9.978e-02 Data: 1.046 (1.046)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 601.5188892380388, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.867607593536377}, "pipe": "data"} +{"event": "data", "data": {"rate": 618.9306052047795, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.968344688415527}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34954.375, 81920.0], "load": 0.99, "temperature": 64, "power": 261.215}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 620.65290272255, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.945240020751953}, "pipe": "data"} +{"event": "data", "data": {"rate": 623.1977069570205, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0675201416015625}, "pipe": "data"} +{"event": "data", "data": {"rate": 625.7928609154417, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.019091606140137}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34954.375, 81920.0], "load": 0.99, "temperature": 64, "power": 305.647}}}, "pipe": "data"} +{"event": "line", "data": "Train: 9 [ 15/16 (100%)] Loss: 7.019 (6.96) Time: 0.365s, 701.69/s (0.442s, 578.53/s) LR: 9.978e-02 Data: 0.000 (0.077)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.941 (0.941) Loss: 6.8005 (6.8005) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.3906 ( 0.3906)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.224) Loss: 6.4733 (6.8217) Acc@1: 0.0000 ( 0.2180) Acc@5: 0.0000 ( 1.1386)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 700.8685231182673, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35438.375, 81920.0], "load": 0.9, "temperature": 65, "power": 306.958}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83759880065918}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35774.375, 81920.0], "load": 0, "temperature": 59, "power": 77.539}}}, "pipe": "data"} +{"event": "line", "data": "Train: 10 [ 0/16 ( 0%)] Loss: 6.838 (6.84) Time: 1.459s, 175.45/s (1.459s, 175.45/s) LR: 9.973e-02 Data: 1.094 (1.094)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 546.0286309600742, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.901539325714111}, "pipe": "data"} +{"event": "data", "data": {"rate": 621.1537967847489, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.897953987121582}, "pipe": "data"} +{"event": "data", "data": {"rate": 621.4603825706232, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35922.375, 81920.0], "load": 0.99, "temperature": 64, "power": 274.609}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.909365653991699}, "pipe": "data"} +{"event": "data", "data": {"rate": 624.725253073353, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.961158275604248}, "pipe": "data"} +{"event": "data", "data": {"rate": 627.7774000214464, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "stop", "data": null, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/suvojova.2024-04-09_15:00:22.247229/resnet152.D0", "--checkpoint-hist", "1"], "time": 1712676303.66606, "return_code": -15}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/resnet50.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/resnet50.D0.data new file mode 100644 index 000000000..5cd24a1c7 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/resnet50.D0.data @@ -0,0 +1,2211 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}, {"aliaslist": [], "hostname": "majorpossum.eastus2.cloudapp.azure.com", "ip": "majorpossum.eastus2.cloudapp.azure.com", "ipaddrlist": ["40.70.216.57"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": false, "main": false, "name": "node1", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "suvojova.2024-04-09_15:00:22.247229", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f837b72a6bbb71a07eed38e1458b35b0", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "resnet50", "--batch-size": 64}, "tags": ["classification", "convnet", "resnet", "vision"], "weight": 1.0, "name": "resnet50", "tag": ["resnet50", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "realbird", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-e5df2666-9a32-cf33-2409-1971e6e097bd": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 71, "power": 94.697, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712675312.685013, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-resnet50.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "resnet50", "--batch-size", "64"], "time": 1712675312.7011843}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0189208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.141357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1485595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07318115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0589599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13543701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06268310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1090087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1446533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08746337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1492919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0975341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11920166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0943603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.185791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11456298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02716064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13958740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04656982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.994384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99810791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9683837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03021240234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0460205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11968994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06439208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.78, "temperature": 71, "power": 277.54}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01129150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03814697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.12310791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9730224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06427001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89617919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.12481689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9935302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92803955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9180908203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92852783203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0045166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0550537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.053955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08026123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0406494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06060791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01177978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9913330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.995849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0933837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.084716796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04132080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97540283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11065673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03277587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.925048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0076904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.011474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0970458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90789794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86712646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.74407958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85906982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98822021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88079833984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90875244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.8, "temperature": 71, "power": 271.497}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9716796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9688720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84259033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95147705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.812744140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9739990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99066162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.760498046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9683837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0108642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94647216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.956298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00640869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90313720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94976806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.985595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06951904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92645263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99237060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93511962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1085.3759556768414, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9364013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92181396484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00555419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96148681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99151611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0391845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9122314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94342041015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.933837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.001708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89166259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9281005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9161376953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05157470703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1069.7255447644193, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95526123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0093994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.929931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99737548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.005615234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9273681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.965576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14642333984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92083740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.83, "temperature": 72, "power": 190.008}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9735107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01495361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0101318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.966552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [2, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1111.8698725956037, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.72662353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79376220703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80889892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8753662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86322021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7818603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94903564453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01275634765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.81646728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8917236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 778.9542279469581, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.992919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88751220703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83465576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99383544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.907958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92449951171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938232421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8594970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.906982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8302001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97406005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9434814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95294189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02996826171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98907470703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1083.2496858147788, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92755126953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8988037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9034423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83819580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86309814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97064208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.954345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9739990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9886474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.79, "temperature": 71, "power": 195.376}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9158935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94757080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.961669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9371337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.976806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1076.769046211081, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96405029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90570068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00738525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96612548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94903564453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0523681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9747314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95806884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.049560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98590087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9737548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.819580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95794677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0809326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97479248046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.883544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98468017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1098.9189970075502, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9923095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [3, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.825927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85980224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.786376953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.850341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79498291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.75146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84417724609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 728.3570228082979, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7880859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83636474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87469482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95977783203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96307373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87982177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78558349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83172607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8668212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.77, "temperature": 71, "power": 210.497}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84478759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96240234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8812255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94757080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94085693359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1072.7951266246216, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03179931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91180419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88433837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0032958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.870849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9293212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97796630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93719482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00421142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91351318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98065185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9639892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9403076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.928466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0362548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1072.013961641833, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99237060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9554443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.949951171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0242919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9603271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92755126953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.036865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8326416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.987060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9608154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9676513671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92315673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85491943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8707275390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1080.9251954282454, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00640869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98382568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9918212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05633544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92144775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [4, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.75518798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.22, "temperature": 68, "power": 86.998}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7857666015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8743896484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.75701904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 736.5717672716795, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.76666259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.879638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83795166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8651123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88006591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90704345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87725830078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8424072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98822021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84716796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93060302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84649658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78570556640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8453369140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1069.8055190615798, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88116455078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.052490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7698974609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0291748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.890380859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9866943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94219970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.936279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94866943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9876708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91876220703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0609130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0474853515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1057.2982526372875, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9068603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.825439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87408447265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91717529296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93695068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9571533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.78, "temperature": 71, "power": 249.395}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99969482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05889892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90765380859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9873046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03533935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.975341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.989990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1076.6749653071925, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86065673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.022705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99322509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.998779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91595458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04693603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00433349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.016357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10699462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [5, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.776611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1117.1144434018217, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93902587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7945556640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83807373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.908935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78363037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9781494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.858154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8026123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88397216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.968505859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.900634765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9215087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9244384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.74603271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87750244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.965087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 848.8789412899189, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8619384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01373291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8697509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8800048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07879638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.872802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8953857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9554443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9736328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99176025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.78, "temperature": 70, "power": 171.435}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9207763671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9617919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9837646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93804931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8397216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.818359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1070.8631958044957, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08831787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88812255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88226318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.978759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01202392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86712646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9315185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9251708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.035888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9388427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9542236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91827392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00018310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.949951171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90899658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9385986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1071.9372761104933, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97064208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85308837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0299072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99151611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9407958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95916748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8702392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14739990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02166748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [6, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1113.4107548587012, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.766845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7198486328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8890380859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8394775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88006591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8902587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.73, "temperature": 71, "power": 275.955}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94049072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89520263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8848876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85162353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91583251953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88653564453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 799.6910877910575, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8624267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93023681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9425048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8919677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93402099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9014892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98834228515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.906982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9183349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8953857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.992431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87017822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07659912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91363525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95977783203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95745849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99517822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1081.4085714401435, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8734130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96173095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8917236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9034423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79461669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9713134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88519287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05010986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.087646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8721923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97113037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0235595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.935302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88616943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93988037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92608642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1057.528206702395, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86358642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.975341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07244873046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8929443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8892822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.883056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0374755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01177978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.81, "temperature": 71, "power": 294.816}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87158203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01385498046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95745849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [7, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1108.3007887716612, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88031005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7603759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7989501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7025146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.896728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91278076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9241943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8050537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7813720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98284912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.76763916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8951416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 772.6261640578944, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8736572265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.825927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.848388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90838623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85198974609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9759521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04522705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.894287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8397216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9031982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96685791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92254638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97930908203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.888427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1081.7226348251165, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0098876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87628173828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8970947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00665283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7503662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04925537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8714599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.81, "temperature": 71, "power": 297.268}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9698486328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96661376953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95611572265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00921630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99481201171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95672607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9947509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01983642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1091.6298866073166, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97576904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95281982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9803466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99456787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.953857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.923583984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9620361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.881103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10626220703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03509521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.046142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02752685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02606201171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.869873046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01446533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1095.9907381095472, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [8, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7694091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90570068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9017333984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8131103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.839599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8763427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7899169921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78131103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 713.73767191185, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95062255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.838623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0047607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8917236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9434814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9041748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7479248046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79296875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.77, "temperature": 71, "power": 277.264}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9053955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84368896484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92041015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97747802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87054443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93328857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8746337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8553466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1084.955162303319, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95281982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87518310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95941162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90655517578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9464111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9544677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91143798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93121337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0428466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01019287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97503662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96258544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0184326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85687255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89337158203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87615966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1088.9945514990202, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99981689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91278076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92169189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9564208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0125732421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93475341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79156494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97900390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.011474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89605712890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96893310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95782470703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9952392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92376708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86676025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.09295654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8553466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1096.2027216317651, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.018798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04425048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.976806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9403076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [9, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92926025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.77, "temperature": 68, "power": 86.171}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.6580810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8775634765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86627197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77252197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 752.3909211240019, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.763671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80657958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.939697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94830322265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96368408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7374267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80682373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.979248046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90826416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92901611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86505126953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94549560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9329833984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87249755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9117431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.937255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1076.918319940576, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93511962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03314208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.873779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9061279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0120849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88226318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00732421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.932373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.956298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.996337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8609619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04974365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92803955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8800048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1053.7424422635656, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97467041015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88360595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91302490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97784423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85040283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94573974609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87774658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.74, "temperature": 71, "power": 239.414}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97418212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96795654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04998779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.832275390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08526611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98223876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1077.368532706099, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.904541015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01593017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97027587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95758056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.952880859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82525634765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9888916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [10, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7283935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 773.003595379829, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.839111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8455810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.73211669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.75860595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91790771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87811279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87445068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86419677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8148193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80560302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84576416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82525634765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90692138671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88763427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98175048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1088.9254014739345, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92071533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00933837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79974365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85479736328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98846435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96307373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02642822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9974365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.77, "temperature": 71, "power": 270.099}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83905029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91107177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9456787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00018310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91265869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84893798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86065673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1079.8130185852262, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92889404296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91302490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99627685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85137939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99835205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88165283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88055419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85882568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8804931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0657958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89898681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0123291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9727783203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.925048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1079.2670587889902, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.923583984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86016845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85345458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92242431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93914794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.918212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87127685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [11, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.75531005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1113.4565528912353, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7322998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83526611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84112548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95172119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.75, "temperature": 70, "power": 310.955}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.825439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7950439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9066162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93914794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9088134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931396484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83197021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00299072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 830.7934513540451, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85174560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9654541015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94476318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9080810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8575439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78045654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8883056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88189697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91107177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.902099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89544677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.930908203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87200927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82464599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1070.4707208055922, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02545166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00091552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94940185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8238525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99969482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96783447265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98394775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90972900390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9095458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93463134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88482666015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9969482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05096435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86676025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95098876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9444580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8795166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1088.4751694813367, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99859619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9552001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02886962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97113037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.79, "temperature": 71, "power": 287.909}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90521240234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91668701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02362060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99249267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [12, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82745361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1103.681873302053, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82025146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79949951171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.865966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83087158203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.925537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82574462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.767822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86492919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94927978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8480224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.833984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 782.8354225000542, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87884521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.030029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86968994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88458251953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9100341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87530517578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88140869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94757080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.922119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.949462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.877685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94097900390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9739990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99249267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9276123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91339111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8748779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1083.1547002499683, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8927001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90081787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8587646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84796142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88671875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.75, "temperature": 71, "power": 307.604}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06488037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99530029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.909423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91583251953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.897705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95391845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.896728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9476318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9757080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1089.5069668142467, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87677001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.945068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0169677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94317626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00152587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94317626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9803466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97845458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9322509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96636962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89862060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.971435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9573974609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99713134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0126953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89569091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06988525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1089.7705043680855, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [13, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89239501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8389892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.819091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93353271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94427490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79571533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8326416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 733.1100569525598, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82928466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97906494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87469482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89910888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7947998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7410888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.8, "temperature": 70, "power": 256.212}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01068115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86041259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.851318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98089599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82415771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0321044921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8673095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962158203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1091.1089895882978, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8050537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.894775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80596923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.934326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9671630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88690185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9774169921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01519775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88018798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88262939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93914794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.898193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9637451171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95635986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1086.8238490120139, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90606689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.985107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92730712890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9393310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87530517578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99542236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9061279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8958740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95416259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89166259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86102294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89556884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0406494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0345458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93426513671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.036865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1091.234613634574, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.993408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99908447265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.879638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [14, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80560302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.46, "temperature": 67, "power": 86.369}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8131103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79180908203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.884033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83612060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84588623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 730.284490589724, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00128173828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97076416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94549560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77191162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8509521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7977294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04913330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.922119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8463134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11358642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8697509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1079.1741362031828, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8599853515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9713134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.028076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89837646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9468994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01458740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02850341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91253662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83660888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79998779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88018798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8387451171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02410888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9302978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98956298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1068.882933601948, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9554443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97637939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93865966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87896728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86175537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9219970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.859375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.79, "temperature": 70, "power": 231.108}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95196533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97454833984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.097412109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03302001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93475341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9525146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87872314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8885498046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79437255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93511962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1094.181466588985, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91729736328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9586181640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87786865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.827880859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02691650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05718994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [15, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77789306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 771.357357892204, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.74603271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89959716796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87744140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.72613525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79461669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89373779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91949462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.822998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.75787353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83685302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89617919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80035400390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9439697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93365478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85809326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8887939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01885986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1080.9997697907659, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85614013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02154541015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95733642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.848876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85498046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84747314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9488525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.79, "temperature": 70, "power": 211.456}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91619873046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8489990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00286865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89837646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82684326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01019287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92095947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93646240234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1061.5288625488931, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8726806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87744140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0084228515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8590087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97686767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94952392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01519775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.973876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.949462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93267822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88104248046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0472412109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98406982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9627685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1069.7159202132439, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.8, "temperature": 70, "power": 219.369}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-resnet50.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "resnet50", "--batch-size", "64"], "time": 1712675383.191861, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/rwkv.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/rwkv.D0.data new file mode 100644 index 000000000..7fda6c872 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/rwkv.D0.data @@ -0,0 +1,424 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}, {"aliaslist": [], "hostname": "majorpossum.eastus2.cloudapp.azure.com", "ip": "majorpossum.eastus2.cloudapp.azure.com", "ipaddrlist": ["40.70.216.57"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": false, "main": false, "name": "node1", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "rwkv", "install_group": "torch", "install_variant": "cuda", "run_name": "suvojova.2024-04-09_15:00:22.247229", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f837b72a6bbb71a07eed38e1458b35b0", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv", "tags": ["llm", "rnn", "unsupported-rocm"], "plan": {"method": "per_gpu"}, "argv": {"--data_type": "dummy", "--ctx_len": 128, "--epoch_steps": 1000, "--epoch_count": 20, "--epoch_begin": 0, "--epoch_save": 0, "--micro_bsz": 16, "--n_layer": 12, "--n_embd": 768, "--pre_ffn": 0, "--head_qk": 0, "--lr_init": "6e-4", "--lr_final": "1e-5", "--warmup_steps": 0, "--beta1": 0.9, "--beta2": 0.99, "--adam_eps": "1e-8", "--accelerator": "gpu", "--devices": 1, "--precision": "tf32", "--strategy": "ddp_find_unused_parameters_false", "--grad_cp": 0, "--random_seed": 1234, "--enable_progress_bar": "False"}, "weight": 1.0, "name": "rwkv", "tag": ["rwkv", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "realbird", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-e5df2666-9a32-cf33-2409-1971e6e097bd": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 37, "power": 67.057, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712677208.025084, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv/voirconf-rwkv.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py", "--data_type", "dummy", "--ctx_len", "128", "--epoch_steps", "1000", "--epoch_count", "20", "--epoch_begin", "0", "--epoch_save", "0", "--micro_bsz", "16", "--n_layer", "12", "--n_embd", "768", "--pre_ffn", "0", "--head_qk", "0", "--lr_init", "6e-4", "--lr_final", "1e-5", "--warmup_steps", "0", "--beta1", "0.9", "--beta2", "0.99", "--adam_eps", "1e-8", "--accelerator", "gpu", "--devices", "1", "--precision", "tf32", "--strategy", "ddp_find_unused_parameters_false", "--grad_cp", "0", "--random_seed", "1234", "--enable_progress_bar", "False"], "time": 1712677208.0413706}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "[2024-04-09 15:40:10,162] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", "pipe": "stdout"} +{"event": "line", "data": "########## work in progress ##########\n", "pipe": "stderr"} +{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"} +{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"} +{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "line", "data": "Global seed set to 1234\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "############################################################################\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# RWKV-4 TF32 on 1x1 GPU, bsz 1x1x16=16, ddp_find_unused_parameters_false \n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Data = (dummy), ProjDir = /Users/satyaortiz-gagne/travail/mila/milabench/proj/rwkv/\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Epoch = 0 to 19 (will continue afterwards), save every 0 epoch\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Each \"epoch\" = 1000 steps, 16000 samples, 2048000 tokens\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Model = 12 n_layer, 768 n_embd, 128 ctx_len\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Adam = lr 0.0006 to 1e-05, warmup 0 steps, beta (0.9, 0.99), eps 1e-08\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Found torch 2.1.0+cu118, recommend 1.13.1+cu117 or newer\n", "pipe": "stderr"} +{"event": "line", "data": "# Found deepspeed 0.12.2, recommend 0.7.0 (faster than newer versions)\n", "pipe": "stderr"} +{"event": "line", "data": "# Found pytorch_lightning 1.9.5, recommend 1.9.1 or newer\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "############################################################################\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "{'load_model': '', 'wandb': '', 'proj_dir': '/Users/satyaortiz-gagne/travail/mila/milabench/proj/rwkv/', 'random_seed': 1234, 'data_file': '', 'data_type': 'dummy', 'vocab_size': 0, 'ctx_len': 128, 'epoch_steps': 1000, 'epoch_count': 20, 'epoch_begin': 0, 'epoch_save': 0, 'micro_bsz': 16, 'n_layer': 12, 'n_embd': 768, 'dim_att': 768, 'dim_ffn': 3072, 'pre_ffn': 0, 'head_qk': 0, 'tiny_att_dim': 0, 'tiny_att_layer': -999, 'lr_init': 0.0006, 'lr_final': 1e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.99, 'adam_eps': 1e-08, 'grad_cp': 0, 'my_pile_version': 1, 'my_pile_stage': 0, 'my_pile_shift': -1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'my_img_version': 0, 'my_img_size': 0, 'my_img_bit': 0, 'my_img_clip': 'x', 'my_img_clip_scale': 1, 'my_img_l1_scale': 0, 'my_img_encoder': 'x', 'my_sample_len': 0, 'my_ffn_shift': 1, 'my_att_shift': 1, 'my_pos_emb': 0, 'load_partial': 0, 'magic_prime': 0, 'my_qa_mask': 0, 'my_testing': '', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': False, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': None, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'ddp_find_unused_parameters_false', 'sync_batchnorm': False, 'precision': 'tf32', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2024-04-09-15-40-11', 'betas': (0.9, 0.99), 'real_bsz': 16, 'run_name': '0 ctx128 L12 D768'}\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "Building dummy data...\n", "pipe": "stderr"} +{"event": "line", "data": "Building token list...\n", "pipe": "stderr"} +{"event": "line", "data": "Data has 1620950 tokens, 13 vocab size.\n", "pipe": "stderr"} +{"event": "line", "data": "RWKV_MY_TESTING \n", "pipe": "stdout"} +{"event": "line", "data": "Using /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118 as PyTorch extensions root...\n", "pipe": "stderr"} +{"event": "line", "data": "Detected CUDA files, patching ldflags\n", "pipe": "stderr"} +{"event": "line", "data": "Emitting ninja build file /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118/wkv_128/build.ninja...\n", "pipe": "stderr"} +{"event": "line", "data": "Building extension module wkv_128...\n", "pipe": "stderr"} +{"event": "line", "data": "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", "pipe": "stderr"} +{"event": "line", "data": "ninja: no work to do.\n", "pipe": "stdout"} +{"event": "line", "data": "Loading extension module wkv_128...\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "line", "data": "############################################################################\n", "pipe": "stdout"} +{"event": "line", "data": "#\n", "pipe": "stdout"} +{"event": "line", "data": "# Init model weight (slow for large models)...\n", "pipe": "stdout"} +{"event": "line", "data": "#\n", "pipe": "stdout"} +{"event": "line", "data": "############################################################################\n", "pipe": "stdout"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "line", "data": "13 768 -0.0006 emb.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.0.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.0.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.0.att.receptance.weight", "pipe": "stdout"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.0.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.0.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.0.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.0.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.1.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.1.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.1.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.1.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.1.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.1.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.1.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.2.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.2.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.2.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.2.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.2.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.2.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.2.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.3.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.3.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.3.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.3.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.3.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.3.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.3.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.4.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.4.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.4.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.4.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.4.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.4.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.4.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.5.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.5.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.5.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.5.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.5.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.5.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.5.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.6.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.6.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.6.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.6.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.6.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.6.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.6.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.7.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.7.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.7.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.7.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.7.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.7.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.7.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.8.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.8.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.8.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.8.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.8.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.8.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.8.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.9.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.9.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.9.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.9.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.9.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.9.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.9.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.10.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.10.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.10.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.10.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.10.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.10.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.10.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.11.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.11.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.11.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.11.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.11.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.11.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.11.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "13 768 0.5 head.weight\n", "pipe": "stdout"} +{"event": "line", "data": "GPU available: True (cuda), used: True\n", "pipe": "stderr"} +{"event": "line", "data": "TPU available: False, using: 0 TPU cores\n", "pipe": "stderr"} +{"event": "line", "data": "IPU available: False, using: 0 IPUs\n", "pipe": "stderr"} +{"event": "line", "data": "HPU available: False, using: 0 HPUs\n", "pipe": "stderr"} +{"event": "line", "data": "13 768 emb.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln0.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln0.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.0.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.0.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.1.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.1.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.2.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.2.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.3.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.3.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.4.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.4.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.5.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.5.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.6.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.6.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.7.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.7.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.8.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.8.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.9.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.9.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.10.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.10.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.11.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.11.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 ln_out.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 ln_out.bias\n", "pipe": "stdout"} +{"event": "line", "data": "13 768 head.weight\n", "pipe": "stdout"} +{"event": "line", "data": "[rank: 0] Global seed set to 1234\n", "pipe": "stderr"} +{"event": "line", "data": "Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/1\n", "pipe": "stderr"} +{"event": "line", "data": "----------------------------------------------------------------------------------------------------\n", "pipe": "stderr"} +{"event": "line", "data": "distributed_backend=nccl\n", "pipe": "stderr"} +{"event": "line", "data": "All distributed processes registered. Starting with 1 processes\n", "pipe": "stderr"} +{"event": "line", "data": "----------------------------------------------------------------------------------------------------\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1574.375, 81920.0], "load": 0.17, "temperature": 37, "power": 63.708}}}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 36, "power": 43.596}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1418.375, 81920.0], "load": 0, "temperature": 37, "power": 62.405}}}, "pipe": "data"} +{"event": "error", "data": {"type": "Exception", "message": ">- DeepSpeed Op Builder: Installed CUDA version 12.2 does not match the version torch was compiled with 11.8, unable to compile cuda/cpp extensions without a matching cuda version."}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py\", line 420, in \n", "pipe": "stderr"} +{"event": "line", "data": " trainer.fit(model, data_loader)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 608, in fit\n", "pipe": "stderr"} +{"event": "line", "data": " call._call_and_handle_interrupt(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py\", line 36, in _call_and_handle_interrupt\n", "pipe": "stderr"} +{"event": "line", "data": " return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/launchers/subprocess_script.py\", line 88, in launch\n", "pipe": "stderr"} +{"event": "line", "data": " return function(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 650, in _fit_impl\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(model, ckpt_path=self.ckpt_path)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 1093, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " self.strategy.setup(self)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/ddp.py\", line 181, in setup\n", "pipe": "stderr"} +{"event": "line", "data": " self.setup_optimizers(trainer)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/strategy.py\", line 142, in setup_optimizers\n", "pipe": "stderr"} +{"event": "line", "data": " self.optimizers, self.lr_scheduler_configs, self.optimizer_frequencies = _init_optimizers_and_lr_schedulers(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/core/optimizer.py\", line 180, in _init_optimizers_and_lr_schedulers\n", "pipe": "stderr"} +{"event": "line", "data": " optim_conf = model.trainer._call_lightning_module_hook(\"configure_optimizers\", pl_module=model)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 1356, in _call_lightning_module_hook\n", "pipe": "stderr"} +{"event": "line", "data": " output = fn(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/src/model.py\", line 606, in configure_optimizers\n", "pipe": "stderr"} +{"event": "line", "data": " return FusedAdam(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/adam/fused_adam.py\", line 94, in __init__\n", "pipe": "stderr"} +{"event": "line", "data": " fused_adam_cuda = FusedAdamBuilder().load()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py\", line 452, in load\n", "pipe": "stderr"} +{"event": "line", "data": " return self.jit_load(verbose)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py\", line 467, in jit_load\n", "pipe": "stderr"} +{"event": "line", "data": " assert_no_cuda_mismatch(self.name)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py\", line 96, in assert_no_cuda_mismatch\n", "pipe": "stderr"} +{"event": "line", "data": " raise Exception(f\">- DeepSpeed Op Builder: Installed CUDA version {sys_cuda_version} does not match the \"\n", "pipe": "stderr"} +{"event": "line", "data": "Exception: >- DeepSpeed Op Builder: Installed CUDA version 12.2 does not match the version torch was compiled with 11.8, unable to compile cuda/cpp extensions without a matching cuda version.\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv/voirconf-rwkv.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py", "--data_type", "dummy", "--ctx_len", "128", "--epoch_steps", "1000", "--epoch_count", "20", "--epoch_begin", "0", "--epoch_save", "0", "--micro_bsz", "16", "--n_layer", "12", "--n_embd", "768", "--pre_ffn", "0", "--head_qk", "0", "--lr_init", "6e-4", "--lr_final", "1e-5", "--warmup_steps", "0", "--beta1", "0.9", "--beta2", "0.99", "--adam_eps", "1e-8", "--accelerator", "gpu", "--devices", "1", "--precision", "tf32", "--strategy", "ddp_find_unused_parameters_false", "--grad_cp", "0", "--random_seed", "1234", "--enable_progress_bar", "False"], "time": 1712677217.232635, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/stargan.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/stargan.D0.data new file mode 100644 index 000000000..e004524d5 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/stargan.D0.data @@ -0,0 +1,683 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}, {"aliaslist": [], "hostname": "majorpossum.eastus2.cloudapp.azure.com", "ip": "majorpossum.eastus2.cloudapp.azure.com", "ipaddrlist": ["40.70.216.57"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": false, "main": false, "name": "node1", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "stargan", "install_group": "torch", "install_variant": "cuda", "run_name": "suvojova.2024-04-09_15:00:22.247229", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f837b72a6bbb71a07eed38e1458b35b0", "tags": ["gan", "resnet", "vision"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan", "plan": {"method": "per_gpu"}, "argv": {"--image_size": 512, "--c_dim": 5, "--batch_size": 16}, "weight": 1.0, "name": "stargan", "tag": ["stargan", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "realbird", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-e5df2666-9a32-cf33-2409-1971e6e097bd": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.46, "memory": 0.010771942138671876}, "temperature": 66, "power": 88.855, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712676776.24434, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/voirconf-stargan.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py", "--image_size", "512", "--c_dim", "5", "--batch_size", "16"], "time": 1712676776.2599926}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Namespace(c_dim=5, c2_dim=8, celeba_crop_size=178, rafd_crop_size=256, image_size=512, g_conv_dim=64, d_conv_dim=64, g_repeat_num=6, d_repeat_num=6, lambda_cls=1, lambda_rec=10, lambda_gp=10, dataset='synth', batch_size=16, num_iters=200000, num_iters_decay=100000, g_lr=0.0001, d_lr=0.0001, n_critic=5, beta1=0.5, beta2=0.999, resume_iters=None, selected_attrs=['Black_Hair', 'Blond_Hair', 'Brown_Hair', 'Male', 'Young'], test_iters=200000, num_workers=1, mode='train', use_tensorboard=False, celeba_image_dir='data/celeba/images', attr_path='data/celeba/list_attr_celeba.txt', rafd_image_dir='data/RaFD/train', log_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/logs', model_save_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/models', sample_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/samples', result_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/results', log_step=10, sample_step=1000, model_save_step=10000, lr_update_step=1000)\n", "pipe": "stdout"} +{"event": "line", "data": "Generator(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(8, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (5): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (6): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (7): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (8): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (9): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (10): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (11): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (12): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (13): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (14): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (15): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (16): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (17): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (18): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (19): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (20): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (21): Conv2d(64, 3, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (22): Tanh()\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": ")\n", "pipe": "stdout"} +{"event": "line", "data": "G\n", "pipe": "stdout"} +{"event": "line", "data": "The number of parameters: 8430528\n", "pipe": "stdout"} +{"event": "line", "data": "Discriminator(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (1): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (3): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (5): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (6): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (7): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (8): Conv2d(512, 1024, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (9): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (10): Conv2d(1024, 2048, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (11): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (conv1): Conv2d(2048, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (conv2): Conv2d(2048, 5, kernel_size=(8, 8), stride=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": ")\n", "pipe": "stdout"} +{"event": "line", "data": "D\n", "pipe": "stdout"} +{"event": "line", "data": "The number of parameters: 45376448\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [0, 10000]}, "pipe": "data"} +{"event": "line", "data": "Start training...\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [1, 10000]}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(warning.format(ret))\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 13.09135627746582}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [53428.375, 81920.0], "load": 1.0, "temperature": 63, "power": 84.638}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [19484.375, 81920.0], "load": 1.0, "temperature": 64, "power": 112.303}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.30767822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.737151145935059}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.804368019104004}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 8.1632510051618, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.472871780395508}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 63.78447252248442, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29784.375, 81920.0], "load": 1.0, "temperature": 68, "power": 268.172}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.446054935455322}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.2718138694763184}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 6.460121921844693, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31678.375, 81920.0], "load": 1.0, "temperature": 67, "power": 353.479}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.4201836585998535}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.216507911682129}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.969565749168396}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:12], Iteration [10/200000], D/loss_real: -1.9712, D/loss_fake: -0.0702, D/loss_cls: 3.9647, D/loss_gp: 0.0046, G/loss_fake: 0.1715, G/loss_rec: 0.5520, G/loss_cls: 3.9818\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 43.7098809938662, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.4251532554626465}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.116135120391846}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.1205484867095947}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.494114398956299}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.4104485511779785}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.25422334091819, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36418.375, 81920.0], "load": 1.0, "temperature": 67, "power": 306.999}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.7500081062316895}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.529216766357422}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 26.7632677001104, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.3130154609680176}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.5506622791290283}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.562422275543213}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:16], Iteration [20/200000], D/loss_real: -0.8785, D/loss_fake: -0.1888, D/loss_cls: 4.0190, D/loss_gp: 0.0611, G/loss_fake: -0.1104, G/loss_rec: 0.5343, G/loss_cls: 3.3822\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 44.28081547405864, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.540276288986206}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.5708091259002686}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.1140185594558716}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.0282247066497803}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.9975810050964355}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.26363442611106, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36418.375, 81920.0], "load": 0.92, "temperature": 69, "power": 302.133}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.5007104873657227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.818920135498047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 26.949342769618756, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9071820974349976}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.035526752471924}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.477890729904175}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:19], Iteration [30/200000], D/loss_real: -1.3056, D/loss_fake: 0.0260, D/loss_cls: 3.4161, D/loss_gp: 0.0341, G/loss_fake: 0.2541, G/loss_rec: 0.5301, G/loss_cls: 3.5516\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 43.708852344121816, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8169243335723877}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.6322102546691895}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.4694974422454834}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36418.375, 81920.0], "load": 0.92, "temperature": 69, "power": 344.187}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.2115354537963867}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.3500218391418457}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.06426198402073, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8179333209991455}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.616603374481201}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.90974125638834, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.0832951068878174}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.1923515796661377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8228189945220947}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:23], Iteration [40/200000], D/loss_real: -6.9929, D/loss_fake: 2.5865, D/loss_cls: 4.1406, D/loss_gp: 0.3089, G/loss_fake: -0.9326, G/loss_rec: 0.5177, G/loss_cls: 3.7892\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 40.278624937723684, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.584784984588623}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 70, "power": 286.526}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.519535541534424}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.6647734642028809}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.6680421829223633}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.7387125492095947}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.198412139328966, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.9076906442642212}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.4922038316726685}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.085480152914485, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.2508779764175415}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.0354152917861938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7795575261116028}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:26], Iteration [50/200000], D/loss_real: -5.1997, D/loss_fake: 2.4312, D/loss_cls: 3.3287, D/loss_gp: 0.0219, G/loss_fake: -2.4973, G/loss_rec: 0.5122, G/loss_cls: 3.3806\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.93, "temperature": 70, "power": 362.707}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.49289356398541, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.344868540763855}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.0880271196365356}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.5831774473190308}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.35530561208724976}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.19174674153327942}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.32946826484833, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.45236653089523315}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.42395657300949097}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.850689412062145, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3194011151790619}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3704109787940979}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.92, "temperature": 69, "power": 243.308}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.185400128364563}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:30], Iteration [60/200000], D/loss_real: -5.9533, D/loss_fake: 2.2073, D/loss_cls: 3.3921, D/loss_gp: 0.0539, G/loss_fake: -1.8192, G/loss_rec: 0.5213, G/loss_cls: 3.4179\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 40.31975667026849, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9736377596855164}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9071493148803711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7974754571914673}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.8348018527030945}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.8199213147163391}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.18646338429798, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.5882846117019653}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.93, "temperature": 70, "power": 271.702}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.2743475437164307}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.857986886782857, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.1959108114242554}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.281416893005371}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.4402904510498047}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:33], Iteration [70/200000], D/loss_real: -5.7772, D/loss_fake: 3.2971, D/loss_cls: 3.8472, D/loss_gp: 0.0073, G/loss_fake: -3.1162, G/loss_rec: 0.5139, G/loss_cls: 3.5212\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 39.88718696937655, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.743876338005066}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.4190081357955933}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.3311082124710083}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.8415721654891968}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.0974764823913574}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.25820187449798, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 72, "power": 191.813}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.4530152082443237}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.3708733320236206}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.857532324325202, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.1673988103866577}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.1851694583892822}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.1520086526870728}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:37], Iteration [80/200000], D/loss_real: -6.5147, D/loss_fake: 3.8857, D/loss_cls: 3.3454, D/loss_gp: 0.0436, G/loss_fake: -2.9044, G/loss_rec: 0.5207, G/loss_cls: 3.4491\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 40.10311071207625, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.8016101121902466}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.6080504655838013}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.5227274894714355}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.460348129272461}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.413737177848816}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.92, "temperature": 70, "power": 140.184}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.28230094694642, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.1281251907348633}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.0212392807006836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.84127238280682, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.907827615737915}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.7979817390441895}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.7232437133789062}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:40], Iteration [90/200000], D/loss_real: -5.6525, D/loss_fake: 4.0682, D/loss_cls: 3.2199, D/loss_gp: 0.0088, G/loss_fake: -3.7273, G/loss_rec: 0.5324, G/loss_cls: 3.3752\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 40.47570191880807, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.5169217586517334}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.6483209133148193}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.92, "temperature": 73, "power": 354.153}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.0749967098236084}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.560365676879883}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.358938694000244}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.232556966512675, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.468632698059082}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.572534084320068}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.905067428485737, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.15188217163086}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.032367706298828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.5272278785705566}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:44], Iteration [100/200000], D/loss_real: -4.9930, D/loss_fake: 4.1957, D/loss_cls: 3.3223, D/loss_gp: 0.0002, G/loss_fake: -6.4818, G/loss_rec: 0.5857, G/loss_cls: 3.9515", "pipe": "stdout"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 40.44815957456909, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.798272132873535}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 73, "power": 312.856}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.342226028442383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.410581588745117}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.419046878814697}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.239133834838867}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.13238800718356, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.922548294067383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.9156606197357178}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.234572425367293, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.242363452911377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8708302974700928}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.55665922164917}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:47], Iteration [110/200000], D/loss_real: -1.8823, D/loss_fake: 0.8729, D/loss_cls: 3.3375, D/loss_gp: 0.0229, G/loss_fake: -0.1700, G/loss_rec: 0.5995, G/loss_cls: 3.4261\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.93, "temperature": 72, "power": 345.27}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.28534830879524, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.067499876022339}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8391854763031006}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.815575361251831}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.909585952758789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.385455369949341}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.070959222419226, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.637715339660645}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.732038736343384}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.9056872256242, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.60103702545166}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.718660831451416}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.93, "temperature": 72, "power": 277.906}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.803324222564697}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:51], Iteration [120/200000], D/loss_real: -1.1665, D/loss_fake: 0.9156, D/loss_cls: 3.3276, D/loss_gp: 0.2727, G/loss_fake: -1.7524, G/loss_rec: 0.5582, G/loss_cls: 3.3954\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 41.59489060287702, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.131701469421387}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.4407215118408203}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.925509452819824}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.2475171089172363}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.2481677532196045}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.907955444812266, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.2094621658325195}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.93, "temperature": 73, "power": 296.603}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.0216121673583984}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.792464180021877, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.5084896087646484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.515705108642578}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.1535003185272217}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:54], Iteration [130/200000], D/loss_real: -2.1625, D/loss_fake: 1.0159, D/loss_cls: 3.2356, D/loss_gp: 0.0064, G/loss_fake: -0.8837, G/loss_rec: 0.5280, G/loss_cls: 3.3766\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 40.475841719392974, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8968610763549805}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8204174041748047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.690340280532837}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.663285732269287}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.546361207962036}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.003209114149826, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 73, "power": 214.897}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.7007572650909424}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.2588911056518555}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.82595418002522, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.000091314315796}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.060607671737671}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.6705269813537598}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:58], Iteration [140/200000], D/loss_real: -0.1581, D/loss_fake: -0.3575, D/loss_cls: 3.1675, D/loss_gp: 0.0019, G/loss_fake: 0.2745, G/loss_rec: 0.5210, G/loss_cls: 3.6738\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 40.28028343751798, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.979808807373047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.6155827045440674}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.2278246879577637}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [144, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.971007347106934}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [145, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.542792320251465}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.93, "temperature": 73, "power": 290.475}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.19449267166019, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [146, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.457787036895752}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [147, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.6227059364318848}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.676440251549852, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [148, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.8770456314086914}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [149, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.926919460296631}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [150, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.374927520751953}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:02], Iteration [150/200000], D/loss_real: -0.1277, D/loss_fake: -0.0097, D/loss_cls: 3.2584, D/loss_gp: 0.1254, G/loss_fake: -0.1068, G/loss_rec: 0.5090, G/loss_cls: 3.3887\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 42.4199819904906, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [151, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.305234432220459}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [152, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.883572578430176}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.93, "temperature": 74, "power": 360.786}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [153, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.829576253890991}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [154, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.753861427307129}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [155, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.3896124362945557}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.88174488851092, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [156, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9929075241088867}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [157, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.811655044555664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.848302602611373, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [158, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.742161750793457}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [159, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.0841004848480225}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [160, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.430011034011841}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:05], Iteration [160/200000], D/loss_real: -1.1459, D/loss_fake: 0.6399, D/loss_cls: 2.9349, D/loss_gp: 0.0001, G/loss_fake: -0.8026, G/loss_rec: 0.5000, G/loss_cls: 3.3621\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 40.19780347544447, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [161, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.7502481937408447}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 73, "power": 330.573}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [162, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.6191232204437256}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [163, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.495950937271118}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [164, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.35919451713562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [165, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.296478271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.98131601799165, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [166, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.134523868560791}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [167, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.916684150695801}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.85763386901017, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [168, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.2937400341033936}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [169, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.448493480682373}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [170, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.991422653198242}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:09], Iteration [170/200000], D/loss_real: 0.0432, D/loss_fake: -0.1306, D/loss_cls: 3.2986, D/loss_gp: 0.5780, G/loss_fake: 0.4074, G/loss_rec: 0.5053, G/loss_cls: 3.5514\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 40.41092856246805, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [171, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.383971214294434}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.93, "temperature": 74, "power": 297.339}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [172, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.57904052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [173, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.520970821380615}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [174, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.216291427612305}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [175, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.756345748901367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.00384868413562, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [176, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.687509536743164}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [177, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9703216552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.744496412537735, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [178, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.612659454345703}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [179, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.1105234622955322}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [180, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.4925484657287598}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:12], Iteration [180/200000], D/loss_real: -0.5100, D/loss_fake: 0.2233, D/loss_cls: 3.2673, D/loss_gp: 0.0512, G/loss_fake: -0.2488, G/loss_rec: 0.5033, G/loss_cls: 3.3796", "pipe": "stdout"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 39.83863063870064, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [181, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.4892184734344482}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 75, "power": 284.746}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [182, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.051115036010742}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [183, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9084584712982178}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [184, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.826239585876465}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [185, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.7394564151763916}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.85476011573647, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [186, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.890897274017334}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [187, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.233086109161377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.779653656888087, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [188, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.191074371337891}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [189, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.7607486248016357}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [190, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.5501227378845215}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:16], Iteration [190/200000], D/loss_real: -1.2542, D/loss_fake: 0.4814, D/loss_cls: 3.1874, D/loss_gp: 0.0136, G/loss_fake: -0.0654, G/loss_rec: 0.4965, G/loss_cls: 3.4382", "pipe": "stdout"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 40.07104365631764, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [191, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9499905109405518}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 75, "power": 339.611}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [192, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8428449630737305}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [193, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8010976314544678}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [194, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.728675127029419}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [195, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.683522939682007}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.11215460762741, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [196, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.424790620803833}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [197, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.3125107288360596}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.785313473997743, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [198, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.2594218254089355}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [199, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.158982276916504}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [200, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.099994421005249}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:19], Iteration [200/200000], D/loss_real: -1.1347, D/loss_fake: 1.2281, D/loss_cls: 3.0020, D/loss_gp: 0.0005, G/loss_fake: -1.1583, G/loss_rec: 0.4951, G/loss_cls: 3.3663\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 39.603171208093215, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.99, "temperature": 74, "power": 307.957}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 75, "power": 307.957}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/voirconf-stargan.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py", "--image_size", "512", "--c_dim", "5", "--batch_size", "16"], "time": 1712676859.9005494, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/super-slomo.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/super-slomo.D0.data new file mode 100644 index 000000000..822b19247 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/super-slomo.D0.data @@ -0,0 +1,413 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}, {"aliaslist": [], "hostname": "majorpossum.eastus2.cloudapp.azure.com", "ip": "majorpossum.eastus2.cloudapp.azure.com", "ipaddrlist": ["40.70.216.57"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": false, "main": false, "name": "node1", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "super-slomo", "install_group": "torch", "install_variant": "cuda", "run_name": "suvojova.2024-04-09_15:00:22.247229", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f837b72a6bbb71a07eed38e1458b35b0", "tags": ["convnet", "unet", "video-interpolation", "vision"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo", "plan": {"method": "per_gpu"}, "argv": {"--train_batch_size": 32}, "weight": 1.0, "name": "super-slomo", "tag": ["super-slomo", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "realbird", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-e5df2666-9a32-cf33-2409-1971e6e097bd": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 70, "power": 93.898, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712676862.592226, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo/voirconf-super-slomo.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py", "--train_batch_size", "32"], "time": 1712676862.6070118}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(msg)\n", "pipe": "stderr"} +{"event": "line", "data": "Epoch: 0\n", "pipe": "stdout"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:136: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\"Detected call of `lr_scheduler.step()` before `optimizer.step()`. \"\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "progress": [0, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1472.375, 81920.0], "load": 0, "temperature": 66, "power": 85.044}}}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/functional.py:4296: UserWarning: Default grid_sample and affine_grid behavior has changed to align_corners=False since 1.3.0. Please specify align_corners=True if the old behavior is desired. See the documentation of grid_sample for details.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 328.4163818359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.37249755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 69, "power": 281.867}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3195495605469}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.30255126953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2887878417969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.27691650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 71, "power": 322.689}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2662353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.25634765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2457275390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.95242832544626, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2337646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2228698730469}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 71, "power": 250.255}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.345213325499905, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2162170410156}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2158203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.03989959658556, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2085266113281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2122497558594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.65450655127688, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 71, "power": 270.47}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2008361816406}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.23996294426446, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1998291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.19976806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.303098935799746, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.19964599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.19927978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 72, "power": 208.164}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.554826916315534, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1987609863281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1982116699219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.085142392938, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.19769287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.48725694132927, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.19732666015625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.82, "temperature": 72, "power": 269.724}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.19720458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.30916321232543, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1971740722656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1971740722656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.020215700374884, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.19708251953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.19683837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 72, "power": 278.909}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.674565755766615, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1965026855469}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1961975097656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.08403543784047, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.19586181640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.04990092261871, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.195556640625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.8, "temperature": 72, "power": 349.881}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1952819824219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.413592250116345, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.19500732421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1947021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.399123179061675, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1944580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 71, "power": 352.325}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1941833496094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.394487354191405, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1939392089844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.01164656546802, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1936340332031}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1932678222656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.3638965809334, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1929626464844}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.82, "temperature": 71, "power": 323.921}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1925964355469}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.015577335436156, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1922912597656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1918640136719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.159088357676154, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.19146728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 71, "power": 343.841}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1910400390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 37.899892573938864, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1907043457031}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1902770996094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 37.93789849720355, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1897888183594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1894226074219}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.82, "temperature": 72, "power": 214.789}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.571148346870054, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1888732910156}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.998568289223385, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1879577636719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.1929186678538, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1874084472656}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 71, "power": 137.558}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1868591308594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.3646604774035, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.18634033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1858215332031}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.15449188486926, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1851806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.18463134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.82, "temperature": 71, "power": 288.781}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.05505503776553, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.18402099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1833801269531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.01152438741749, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.18267822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1820068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.66213516076697, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 71, "power": 120.897}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1812744140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1805725097656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.78716917503622, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1797790527344}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.440795190489276, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.17901611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1781921386719}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 71, "power": 318.751}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.564077864799515, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1773376464844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1764221191406}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.16520140658756, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.17547607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.17449951171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 37.99343495765075, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 72, "power": 274.147}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1734924316406}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.17236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.021243821053304, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1712341308594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.16998291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.139688971944864, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.16864013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.92, "temperature": 71, "power": 342.8}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.16717529296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.971210249211026, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1656188964844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1643371582031}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.67270729153136, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.16314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.55277532206516, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1614990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 70, "power": 345.784}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.15972900390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.73053203831094, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.15826416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.15673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 37.92390399191576, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.155029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.82, "temperature": 71, "power": 321.123}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.15313720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.12009119209723, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1511535644531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1492919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.21162538382369, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.14727783203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 72, "power": 273.076}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1451416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.172298855262675, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1429748535156}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1409912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.12205634920386, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1388244628906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.13653564453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 37.941677849111656, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 71, "power": 337.382}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1343688964844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.13226318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.065031046065336, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1299133300781}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.12762451171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.17541851756057, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1253356933594}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.82, "temperature": 71, "power": 287.584}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1228332519531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.21950894231256, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1202392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.11767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.882850672046466, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1148986816406}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 70, "power": 234.339}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.112548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.37813428422003, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.11077880859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.77106690130039, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1075134277344}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1042785644531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.585897562974274, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.0984191894531}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.85, "temperature": 70, "power": 325.922}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.09539794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 37.973770243584774, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.0878601074219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.09539794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 37.97335999696094, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.0661926269531}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 70, "power": 275.553}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.0711669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 37.999441187342796, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.0438232421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 327.9986572265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.088683123559875, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.14361572265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 327.9217834472656}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.84, "temperature": 71, "power": 342.523}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.0834971570833, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 327.8737487792969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 327.81097412109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 37.88767464186728, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 72, "power": 299.591}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo/voirconf-super-slomo.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py", "--train_batch_size", "32"], "time": 1712676951.8056214, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/t5.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/t5.D0.data new file mode 100644 index 000000000..fae8cc20b --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/t5.D0.data @@ -0,0 +1,594 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}, {"aliaslist": [], "hostname": "majorpossum.eastus2.cloudapp.azure.com", "ip": "majorpossum.eastus2.cloudapp.azure.com", "ipaddrlist": ["40.70.216.57"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": false, "main": false, "name": "node1", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "suvojova.2024-04-09_15:00:22.247229", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f837b72a6bbb71a07eed38e1458b35b0", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "T5", "--batch-size": 16}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "transformer"], "weight": 2.0, "name": "t5", "tag": ["t5", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "realbird", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-e5df2666-9a32-cf33-2409-1971e6e097bd": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 71, "power": 95.036, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712675932.071199, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-t5.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "T5", "--batch-size", "16"], "time": 1712675932.0878217}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.970378875732422}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.800580024719238}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.727293014526367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.682456970214844}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 72, "power": 303.848}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.654926300048828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.628446578979492}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.61573600769043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.587346076965332}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.556131362915039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.513788223266602}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.457639694213867}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.321690559387207}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.367353439331055}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 73, "power": 291.175}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.313187599182129}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.28883171081543}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.212865829467773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.028278350830078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.84654426574707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.015433311462402}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.654990196228027}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.873329070439546, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.576047897338867}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.431913375854492}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35800552368164}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 73, "power": 277.294}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.38845385119996, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.306499481201172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.0736665725708}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.939047813415527}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.910099029541016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.83302834518841, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.83672046661377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.653719902038574}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.481781959533691}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.33133987805137, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.48547649383545}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.421192169189453}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 73, "power": 301.319}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.146132469177246}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.03295208579149, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.017644882202148}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.908407211303711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.934696197509766}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.747555732727051}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.88356256846136, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.655747413635254}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.503643035888672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.347489356994629}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.420005184656176, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.31602668762207}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 74, "power": 307.915}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.231689929962158}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05312442779541}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.2710024827676, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9287261962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.817322731018066}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.710197448730469}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.60150671005249}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.6379369989259, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.518242359161377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.411893844604492}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.325069427490234}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.07091476038458, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 73, "power": 283.329}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.233058929443359}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.172609329223633}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.097485542297363}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.53793069564524, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.041872501373291}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.932075023651123}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.845073699951172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.775830268859863}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.926166378110764, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.705239295959473}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.633525848388672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.575502395629883}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 73, "power": 306.652}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.41243110053213, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.5167365074157715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.423802852630615}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.354887962341309}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.284927862355396, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.272907257080078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.22577428817749}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.148532390594482}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.052487373352051}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.947208336631746, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.000744819641113}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.897304534912109}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 74, "power": 301.032}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.836282253265381}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.24466579795221, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.77700138092041}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.716606140136719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.660195827484131}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.60576117225442, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.5612311363220215}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.502846717834473}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.482702732086182}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 49.28325331765928, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.492550373077393}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.360192775726318}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 75, "power": 307.094}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.2910284996032715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.2590203285217285}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.34118971084908, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.185894012451172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.096889972686768}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.042828559875488}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.680101739309706, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.9600980281829834}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.8612828254699707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.813199996948242}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.026802334236464, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.774353265762329}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 74, "power": 305.224}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.680110216140747}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.641136646270752}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.5967588424682617}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.82762326112051, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.522066354751587}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.439159870147705}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.3564889430999756}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.2232733121753, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.3717949390411377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.323021650314331}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.2349772453308105}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.9382205291193, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.192401647567749}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 73, "power": 279.68}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.1241087913513184}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.0823092460632324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 49.15316213489718, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.988307476043701}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9341442584991455}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8254554271698}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.76162052154541}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.12555480501483, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.763589859008789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.7407455444335938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.672053337097168}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.01375490180158, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 74, "power": 321.991}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.585357189178467}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.5391626358032227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.4440035820007324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 49.05469407280205, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.3648884296417236}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.2605745792388916}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.2033910751342773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.71019278322205, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.1607017517089844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.115994691848755}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.066122055053711}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.99, "temperature": 74, "power": 222.248}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.993964672088623}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.919008562112154, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.0019028186798096}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.9345535039901733}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.8910869359970093}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.29229127836533, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.861147165298462}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.8138902187347412}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.7461824417114258}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.31803581192987, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.6692562103271484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.625261664390564}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.5834534168243408}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 74, "power": 308.159}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.5272022485733032}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.15468797763228, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.459859013557434}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.4157166481018066}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.3812469244003296}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.22884443079776, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.3119561672210693}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.2564345598220825}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.2277307510375977}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.39756027753377, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.1866565942764282}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.1411606073379517}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 74, "power": 316.928}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.083279013633728}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.0341026782989502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.317078002605534, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9987545013427734}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9641054272651672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9365309476852417}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.978423899375215, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.8940635919570923}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.8575931787490845}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.8317781686782837}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.51650027053173, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7994922995567322}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 74, "power": 295.165}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7648830413818359}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [144, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7411973476409912}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [145, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7056658267974854}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.21757259833375, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [146, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6834872364997864}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [147, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6573166847229004}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [148, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6341385245323181}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.94609616157135, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [149, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6199974417686462}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [150, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.5888203978538513}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [151, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.5731779932975769}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.45876604188728, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [152, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.5541250109672546}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 73, "power": 235.624}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [153, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.536582350730896}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [154, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.5135989785194397}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [155, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.4957291781902313}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.356384720459, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [156, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.47861233353614807}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [157, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.47117674350738525}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [158, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.4504289925098419}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.09482749925266, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [159, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.43357139825820923}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [160, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.4255506992340088}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [161, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.41500476002693176}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.99, "temperature": 74, "power": 322.22}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.61582093861026, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [162, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.4001453220844269}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [163, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3856187164783478}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [164, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.37681058049201965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [165, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3642743229866028}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.828865144295065, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [166, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3536361753940582}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [167, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.34521564841270447}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [168, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3393672704696655}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.59646329940463, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [169, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3293442130088806}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [170, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.32399898767471313}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 74, "power": 318.339}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [171, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3113195300102234}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.32528124105802, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [172, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3049860894680023}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [173, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2919404208660126}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [174, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.28530269861221313}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [175, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2786383628845215}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.85032510262565, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [176, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.27610689401626587}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [177, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2684989273548126}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [178, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2630639374256134}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.80205452664288, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [179, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2509020268917084}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 73, "power": 275.278}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [180, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.24854587018489838}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [181, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.24656544625759125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.653547369082496, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [182, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.23619654774665833}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [183, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.23337531089782715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [184, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.22808784246444702}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [185, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.22517065703868866}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.65655951714193, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [186, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.21650603413581848}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [187, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.21727493405342102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [188, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.21555562317371368}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.03530104738778, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [189, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.20680122077465057}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 73, "power": 302.592}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [190, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.20380568504333496}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [191, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2012479454278946}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.39712061722047, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [192, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1972467303276062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [193, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1940830945968628}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [194, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1896078884601593}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [195, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.18643631041049957}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.788288156898325, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [196, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.18215446174144745}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [197, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.17853213846683502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [198, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.17645688354969025}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.762814408016716, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 73, "power": 298.513}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [199, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1725698709487915}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [200, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.16997073590755463}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [201, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.16654980182647705}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.66390327673736, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [202, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1624709963798523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [203, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.16435472667217255}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [204, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.16150528192520142}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [205, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.15788640081882477}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.55761395849358, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [206, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.15364141762256622}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [207, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.15300226211547852}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [208, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1467810720205307}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 73, "power": 287.523}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.27666697868578, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [209, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.14508351683616638}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [210, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.14274908602237701}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [211, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.14380408823490143}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.55307456855578, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [212, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.14112578332424164}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [213, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1418134719133377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [214, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.13803264498710632}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [215, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.13522985577583313}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.14215306674722, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [216, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.13362157344818115}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [217, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.13214538991451263}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 73, "power": 311.943}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [218, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.12823574244976044}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.356207749274006, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 73, "power": 292.001}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-t5.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "T5", "--batch-size", "16"], "time": 1712676007.435871, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/tf32.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/tf32.D0.data new file mode 100644 index 000000000..b2a9cfa54 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/tf32.D0.data @@ -0,0 +1,124 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}, {"aliaslist": [], "hostname": "majorpossum.eastus2.cloudapp.azure.com", "ip": "majorpossum.eastus2.cloudapp.azure.com", "ipaddrlist": ["40.70.216.57"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": false, "main": false, "name": "node1", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "suvojova.2024-04-09_15:00:22.247229", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f837b72a6bbb71a07eed38e1458b35b0", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp32", "--tf32": true}, "weight": 0.0, "name": "tf32", "tag": ["tf32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "realbird", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-e5df2666-9a32-cf33-2409-1971e6e097bd": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 66, "power": 88.921, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712675183.871066, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32", "--tf32"], "time": 1712675183.8814704}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 119.82108803571462, "units": "Tflops", "t": 1712675185.763562}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [882.5, 81920.0], "load": 0, "temperature": 63, "power": 53.84}}, "t": 1712675185.2099228}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 0, "temperature": 67, "power": 84.102}}, "t": 1712675185.72049}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 135.95872665415837, "units": "Tflops", "t": 1712675185.9259768}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 135.96634274953712, "units": "Tflops", "t": 1712675186.0877821}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 135.9641380026, "units": "Tflops", "t": 1712675186.249582}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 69, "power": 306.71}}, "t": 1712675186.230118}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 135.9585262421178, "units": "Tflops", "t": 1712675186.4114509}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 130.14971675064663, "units": "Tflops", "t": 1712675186.58048}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.68038931201184, "units": "Tflops", "t": 1712675186.7450423}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 69, "power": 278.15}}, "t": 1712675186.7397413}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.70027057049646, "units": "Tflops", "t": 1712675186.9083984}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 135.11821505321117, "units": "Tflops", "t": 1712675187.071198}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 135.94730411075176, "units": "Tflops", "t": 1712675187.233014}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 135.4756517470965, "units": "Tflops", "t": 1712675187.3954093}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 307.084}}, "t": 1712675187.2479062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.45148102281996, "units": "Tflops", "t": 1712675187.562819}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.84819497040706, "units": "Tflops", "t": 1712675187.7271638}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.35101792471193, "units": "Tflops", "t": 1712675187.8909004}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 305.621}}, "t": 1712675187.7566288}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.3891903877616, "units": "Tflops", "t": 1712675188.0546324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 135.06538518887268, "units": "Tflops", "t": 1712675188.2174945}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.9884897332797, "units": "Tflops", "t": 1712675188.3804538}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 298.599}}, "t": 1712675188.2648993}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.0813397610628, "units": "Tflops", "t": 1712675188.5470676}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.88666524682716, "units": "Tflops", "t": 1712675188.7113686}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.80894729398764, "units": "Tflops", "t": 1712675188.8745492}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 300.259}}, "t": 1712675188.7728112}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.393890036264, "units": "Tflops", "t": 1712675189.0383008}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.77014218538258, "units": "Tflops", "t": 1712675189.2015202}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.6729321224225, "units": "Tflops", "t": 1712675189.364866}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 294.502}}, "t": 1712675189.2808669}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.02726664416596, "units": "Tflops", "t": 1712675189.5315478}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.34690833745708, "units": "Tflops", "t": 1712675189.6952958}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.42189892392165, "units": "Tflops", "t": 1712675189.858941}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 300.544}}, "t": 1712675189.7879486}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.43365438192274, "units": "Tflops", "t": 1712675190.022643}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.42248664799396, "units": "Tflops", "t": 1712675190.1862833}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.99461656036888, "units": "Tflops", "t": 1712675190.3504543}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 300.45}}, "t": 1712675190.299112}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.81912870904057, "units": "Tflops", "t": 1712675190.516121}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.46567028746526, "units": "Tflops", "t": 1712675190.6809378}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 135.12870679666867, "units": "Tflops", "t": 1712675190.843727}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 299.678}}, "t": 1712675190.8088624}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.40642403938585, "units": "Tflops", "t": 1712675191.0074625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.42013578254014, "units": "Tflops", "t": 1712675191.1711073}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.78878437208658, "units": "Tflops", "t": 1712675191.3355236}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 293.455}}, "t": 1712675191.319184}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.28880489801463, "units": "Tflops", "t": 1712675191.5006225}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.24682732576295, "units": "Tflops", "t": 1712675191.6657078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.40054843433649, "units": "Tflops", "t": 1712675191.8293788}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 301.547}}, "t": 1712675191.827707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.4107331429359, "units": "Tflops", "t": 1712675191.9931095}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.40936203456315, "units": "Tflops", "t": 1712675192.1567693}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.7761712645171, "units": "Tflops", "t": 1712675192.3212016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.76938057622758, "units": "Tflops", "t": 1712675192.485639}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 299.373}}, "t": 1712675192.3361712}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.09915345332087, "units": "Tflops", "t": 1712675192.650957}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.52750047202196, "units": "Tflops", "t": 1712675192.8156939}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.9276680069864, "units": "Tflops", "t": 1712675192.9787292}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 295.296}}, "t": 1712675192.8448856}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.3940858620848, "units": "Tflops", "t": 1712675193.1424537}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.01213858436083, "units": "Tflops", "t": 1712675193.3065941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.80178806487413, "units": "Tflops", "t": 1712675193.471006}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 302.023}}, "t": 1712675193.3531234}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.08186934543582, "units": "Tflops", "t": 1712675193.636383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.04309246511096, "units": "Tflops", "t": 1712675193.8017206}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.6731287622746, "units": "Tflops", "t": 1712675193.9650648}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 289.344}}, "t": 1712675193.8612835}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.14662649830453, "units": "Tflops", "t": 1712675194.129089}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.4187644823312, "units": "Tflops", "t": 1712675194.292732}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.20791765461772, "units": "Tflops", "t": 1712675194.4566343}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 301.428}}, "t": 1712675194.3701258}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.28877802191255, "units": "Tflops", "t": 1712675194.6229794}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.3829247010239, "units": "Tflops", "t": 1712675194.786672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.39271509332326, "units": "Tflops", "t": 1712675194.9503572}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 301.944}}, "t": 1712675194.878238}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.3862532743548, "units": "Tflops", "t": 1712675195.1140943}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.40133181866207, "units": "Tflops", "t": 1712675195.277762}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.6186110607174, "units": "Tflops", "t": 1712675195.442394}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 302.297}}, "t": 1712675195.3872194}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.82295408417747, "units": "Tflops", "t": 1712675195.6080523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.490975095447, "units": "Tflops", "t": 1712675195.7728329}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.50697644735038, "units": "Tflops", "t": 1712675195.9363792}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 302.039}}, "t": 1712675195.8942864}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.41406309401864, "units": "Tflops", "t": 1712675196.1000834}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.41758910442084, "units": "Tflops", "t": 1712675196.2637289}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.46509090041204, "units": "Tflops", "t": 1712675196.428542}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 301.142}}, "t": 1712675196.401601}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.04194102215558, "units": "Tflops", "t": 1712675196.5939295}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.71042758436153, "units": "Tflops", "t": 1712675196.758484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.115806892294, "units": "Tflops", "t": 1712675196.9225116}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 300.544}}, "t": 1712675196.908676}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.43796523170082, "units": "Tflops", "t": 1712675197.0861912}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.41758910442084, "units": "Tflops", "t": 1712675197.2498488}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.4255113645767, "units": "Tflops", "t": 1712675197.4147308}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.46895357578722, "units": "Tflops", "t": 1712675197.57955}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 296.364}}, "t": 1712675197.4188232}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.35278488104964, "units": "Tflops", "t": 1712675197.7445629}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.98157536184715, "units": "Tflops", "t": 1712675197.9087517}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.37705115030707, "units": "Tflops", "t": 1712675198.0724497}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 292.759}}, "t": 1712675197.92592}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.41896038064806, "units": "Tflops", "t": 1712675198.236146}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.58486263156286, "units": "Tflops", "t": 1712675198.4020636}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.7323349633643, "units": "Tflops", "t": 1712675198.5665488}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 301.338}}, "t": 1712675198.4342585}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.5284670232124, "units": "Tflops", "t": 1712675198.7313554}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.09728293276035, "units": "Tflops", "t": 1712675198.895407}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.42542534544725, "units": "Tflops", "t": 1712675199.0590441}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 300.066}}, "t": 1712675198.9441137}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.41700142317387, "units": "Tflops", "t": 1712675199.2227612}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.84189293652963, "units": "Tflops", "t": 1712675199.3883488}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.62596722948862, "units": "Tflops", "t": 1712675199.552972}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 294.421}}, "t": 1712675199.451834}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.75192197159427, "units": "Tflops", "t": 1712675199.7174885}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.4025069122736, "units": "Tflops", "t": 1712675199.8811624}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.42542534544725, "units": "Tflops", "t": 1712675200.0447993}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 302.109}}, "t": 1712675199.960947}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.03297628766703, "units": "Tflops", "t": 1712675200.208963}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.8336663069291, "units": "Tflops", "t": 1712675200.374561}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32", "--tf32"], "time": 1712675200.904881, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/whisper.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/whisper.D0.data new file mode 100644 index 000000000..5154127e2 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/suvojova.2024-04-09_15:00:22.247229/whisper.D0.data @@ -0,0 +1,686 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}, {"aliaslist": [], "hostname": "majorpossum.eastus2.cloudapp.azure.com", "ip": "majorpossum.eastus2.cloudapp.azure.com", "ipaddrlist": ["40.70.216.57"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": false, "main": false, "name": "node1", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "realbird.eastus2.cloudapp.azure.com", "ip": "realbird.eastus2.cloudapp.azure.com", "ipaddrlist": ["10.0.1.5", "00:00:00:00:00:00", "fe80::6245:bdff:fe7a:73fc%eth0", "127.0.0.1", "::1", "60:45:bd:7a:73:fc"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100-bb56eeb281e608548ee46dd1607660a7/id_rsa.covalent.realbird.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "suvojova.2024-04-09_15:00:22.247229", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "f837b72a6bbb71a07eed38e1458b35b0", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Whisper", "--batch-size": 64}, "plan": {"method": "per_gpu"}, "tags": ["audio", "huggingface"], "weight": 1.0, "name": "whisper", "tag": ["whisper", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "realbird", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-e5df2666-9a32-cf33-2409-1971e6e097bd": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.12, "memory": 0.010771942138671876}, "temperature": 70, "power": 92.936, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712676087.977338, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-whisper.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Whisper", "--batch-size", "64"], "time": 1712676087.993317}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1360.375, 81920.0], "load": 0, "temperature": 65, "power": 83.602}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1360.375, 81920.0], "load": 0, "temperature": 64, "power": 82.134}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6992721557617188}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 67, "power": 306.675}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.4653310775756836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.970947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6924057006835938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7043609619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6922607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6987991333007812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6994171142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912460327148438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6975479125976562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6967544555664062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912155151367188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6952743530273438}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.92, "temperature": 68, "power": 271.266}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6956329345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6933135986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6947784423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6918106079101562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6918716430664062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6938247680664062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6923065185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69122314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6926803588867188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6926116943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 194.59008150167645, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 1.0, "temperature": 68, "power": 281.109}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6917190551757812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69244384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6915664672851562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.6032655613261, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912841796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6919631958007812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6917724609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.45600544984165, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691802978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6913375854492188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912460327148438}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.92, "temperature": 68, "power": 271.651}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.98477548857727, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691558837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914215087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691375732421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.22062880944472, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69146728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912155151367188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912612915039062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6913833618164062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.53768433985104, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912765502929688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912918090820312}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 69, "power": 275.706}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 218.46982863266754, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912460327148438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.37956075671178, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912155151367188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912307739257812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 218.15113423319224, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912155151367188}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 69, "power": 287.885}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.42771555081964, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912384033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912307739257812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 219.03502194259482, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 218.41117492108293, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.92, "temperature": 69, "power": 300.67}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 218.26172770672048, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.38623520209012, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911468505859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.94, "temperature": 69, "power": 260.007}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 218.49085624216082, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691131591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.38639967794404, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691131591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.81326038603885, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911468505859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 1.0, "temperature": 70, "power": 300.074}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 218.05690323168616, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 216.71879460552685, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.84070902759936, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 70, "power": 194.772}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.486364505475, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.9007691507313, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 218.33322495419878, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 69, "power": 337.765}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69110107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.5422934042436, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691131591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.6567198611301, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911468505859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.92, "temperature": 71, "power": 284.747}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.65021365637475, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.91362457898978, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691131591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911087036132812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.01888494013326, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911087036132812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 71, "power": 296.199}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.15301708300626, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69110107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.4507517197738, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910781860351562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910781860351562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.70917008917385, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [144, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911087036132812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [145, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910629272460938}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 1.0, "temperature": 71, "power": 335.295}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [146, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910858154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [147, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910629272460938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.92574688554782, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [148, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910552978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [149, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910476684570312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [150, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910552978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [151, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910476684570312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 216.8860032417019, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [152, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910247802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [153, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691009521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [154, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6909866333007812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [155, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6909027099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.61840142031096, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [156, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6908340454101562}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 1.0, "temperature": 71, "power": 323.984}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [157, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [158, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6919326782226562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [159, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914596557617188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.10495331243632, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [160, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910934448242188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [161, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914291381835938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [162, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691009521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [163, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69134521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.7395878417548, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [164, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691009521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [165, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [166, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69110107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [167, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910018920898438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 216.14622901655156, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [168, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912841796875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 1.0, "temperature": 71, "power": 291.993}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [169, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910476684570312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [170, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691253662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [171, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910247802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 216.77195230995713, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [172, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [173, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910247802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [174, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [175, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910324096679688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.15760175623498, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [176, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910324096679688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [177, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910858154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [178, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6909027099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [179, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6909942626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 71, "power": 287.154}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 216.70981730188274, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [180, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6908721923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [181, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6908798217773438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [182, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6908721923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [183, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6906585693359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.4180615765744, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [184, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6905517578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [185, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6905288696289062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [186, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6909103393554688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [187, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6955337524414062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.30561588658338, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [188, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7002067565917969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [189, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.697906494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [190, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6913986206054688}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 71, "power": 279.204}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [191, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6951828002929688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 216.8360031057285, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [192, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6938323974609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [193, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914520263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [194, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6948165893554688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [195, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912918090820312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 216.89506571393082, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [196, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6923904418945312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [197, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.693206787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [198, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6908798217773438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [199, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6921310424804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.93896483078063, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [200, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6922607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [201, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6906814575195312}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 71, "power": 275.55}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [202, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6915359497070312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [203, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6916046142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.40159806241908, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [204, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6903610229492188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [205, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6906814575195312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [206, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69073486328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [207, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6897354125976562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.89011692118152, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [208, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6897735595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [209, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6895599365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [210, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6880340576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [211, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6874923706054688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.36152364104214, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [212, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6850967407226562}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 71, "power": 281.133}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [213, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6821975708007812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [214, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.67584228515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [215, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6980819702148438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 218.0677480579282, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [216, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.832855224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [217, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7104606628417969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [218, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.708587646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [219, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6919479370117188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 216.82187975268425, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [220, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6960067749023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [221, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6996231079101562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [222, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6961898803710938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [223, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914520263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.92, "temperature": 70, "power": 274.368}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 219.0335348735325, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [224, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [225, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6962814331054688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [226, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6955718994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [227, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6918563842773438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 219.06039930677002, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [228, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691741943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [229, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.693878173828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [230, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6942672729492188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [231, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6923370361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 219.1120193551575, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [232, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [233, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [234, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6931381225585938}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 1.0, "temperature": 71, "power": 341.315}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [235, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6926040649414062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 219.4664603691084, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [236, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6917266845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [237, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [238, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6917572021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [239, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69232177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 219.17055946256093, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [240, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6920166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [241, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [242, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691253662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [243, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 218.7534654212616, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [244, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69171142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [245, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6917266845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.93, "temperature": 71, "power": 153.658}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [246, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6913909912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [247, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 218.31125287698097, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [248, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [249, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914443969726562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [250, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [251, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6913299560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 218.9156551758609, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [252, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910629272460938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [253, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912384033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [254, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [255, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914215087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 218.29472864959027, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [256, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 71, "power": 332.754}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [257, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912460327148438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [258, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [259, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 218.6926330407727, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [260, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912384033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [261, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [262, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912612915039062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [263, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 218.51463900797364, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 72, "power": 265.762}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-whisper.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Whisper", "--batch-size", "64"], "time": 1712676171.0225024, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bert-fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bert-fp16.D0.data new file mode 100644 index 000000000..c2a1cc47b --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bert-fp16.D0.data @@ -0,0 +1,444 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "fp16", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-fp16", "tag": ["bert-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 1.0, "memory": 0.010771942138671876}, "temperature": 69, "power": 99.362, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 70, "power": 109.825, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712628207.232692, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712628210.3326516}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.47928237915039}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [22703.25, 81920.0], "load": 0.02, "temperature": 68, "power": 358.003}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.324040412902832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.062586784362793}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.039785385131836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.756427764892578}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.622476577758789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.533597946166992}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.476072311401367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.44356918334961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.426374435424805}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.416016578674316}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.406279563903809}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.395854949951172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3849458694458}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.377933502197266}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.374731063842773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.374163627624512}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.373811721801758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.372345924377441}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 69, "power": 285.045}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.369985580444336}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.367439270019531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.366805076599121}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.366157531738281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36543083190918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.365005493164062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.364080429077148}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.363504409790039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.362876892089844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.362106323242188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36214828491211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 152.66180903628288, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361862182617188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36141586303711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361193656921387}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361042022705078}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 69, "power": 360.061}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360857963562012}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.65791437127126, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360292434692383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36062240600586}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360210418701172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359718322753906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359888076782227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.9437238244354, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36003303527832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359695434570312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35957145690918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359235763549805}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359640121459961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.31229296170818, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35922622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358892440795898}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35894775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359354019165039}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 70, "power": 263.752}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359068870544434}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.76234774644922, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358457565307617}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35875129699707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358539581298828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358678817749023}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358731269836426}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.67440354114072, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359058380126953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358646392822266}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359306335449219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358448028564453}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358570098876953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.31835499014406, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358207702636719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358234405517578}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358455657958984}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 71, "power": 276.724}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358585357666016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.49599392455005, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358243942260742}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358636856079102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358400344848633}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358379364013672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35833740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.83724397614478, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358665466308594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358572959899902}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358033180236816}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35838508605957}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.41467296800886, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358282089233398}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358419418334961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35842227935791}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358055114746094}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 71, "power": 334.475}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3580322265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.41142674162953, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358024597167969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358108520507812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357969284057617}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358566284179688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358064651489258}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.42956770832046, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358379364013672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358316421508789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358358383178711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358470916748047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358149528503418}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.36509040283352, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358348846435547}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35792350769043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 72, "power": 336.647}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358392715454102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358460426330566}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.46061913803658, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358633995056152}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358476638793945}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357904434204102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358171463012695}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.21932885774373, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358545303344727}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357903480529785}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358219146728516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358108520507812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358248710632324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.06611917900594, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357797622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35807991027832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357889175415039}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 72, "power": 269.762}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35851001739502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358098983764648}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.59991476044826, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358307838439941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358318328857422}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358499526977539}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358034133911133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357908248901367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.2718793978005, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358236312866211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358089447021484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35861587524414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358320236206055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358344078063965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.09597488421537, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357931137084961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357934951782227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358261108398438}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 72, "power": 293.52}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358291625976562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358189582824707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.49630591814523, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358278274536133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357937812805176}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358482360839844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357876777648926}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.43694284741625, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358512878417969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358345031738281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358097076416016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358293533325195}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357551574707031}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.23189448517115, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35798454284668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35827922821045}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358026504516602}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 72, "power": 344.329}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358270645141602}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35817813873291}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.61455671388416, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35809326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358072280883789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358430862426758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357963562011719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [144, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358388900756836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.7430219756439, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [145, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35820198059082}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [146, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358226776123047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [147, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35807991027832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [148, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [149, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358162879943848}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.3088760976202, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [150, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358267784118652}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [151, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358076095581055}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 73, "power": 316.68}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [152, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358078002929688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [153, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358617782592773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [154, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358043670654297}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.14693599684963, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [155, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357868194580078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [156, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358116149902344}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [157, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35775375366211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [158, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358063697814941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [159, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357721328735352}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.0691633601467, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [160, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358312606811523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [161, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357690811157227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [162, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357881546020508}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [163, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358210563659668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [164, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.9756054835532, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [165, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [166, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358190536499023}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [167, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357914924621582}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 73, "power": 248.27}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [168, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357951164245605}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [169, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357860565185547}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.8626008628341, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [170, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357804298400879}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [171, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357797622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [172, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358224868774414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [173, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35790729522705}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [174, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3578519821167}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.21999695081573, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [175, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357871055603027}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [176, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358051300048828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [177, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358205795288086}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [178, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358312606811523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [179, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357698440551758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.98456415109965, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 73, "power": 252.906}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712628253.5679116, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bert-fp16.D1.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bert-fp16.D1.data new file mode 100644 index 000000000..afdbdae1d --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bert-fp16.D1.data @@ -0,0 +1,444 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "fp16", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-fp16", "tag": ["bert-fp16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 67, "power": 94.666, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 69, "power": 107.957, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712628210.314224, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712628210.3408854}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.47928237915039}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [22703.25, 81920.0], "load": 0.01, "temperature": 69, "power": 312.484}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.324040412902832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.062586784362793}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.039785385131836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.756427764892578}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.622476577758789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.533597946166992}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.476072311401367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.44356918334961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.426374435424805}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.416016578674316}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.406279563903809}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.395854949951172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3849458694458}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.377933502197266}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.374731063842773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.374163627624512}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.373811721801758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.372345924377441}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.369985580444336}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24613.25, 81920.0], "load": 0.99, "temperature": 71, "power": 297.093}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.367439270019531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.366805076599121}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.366157531738281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36543083190918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.365005493164062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.364080429077148}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.363504409790039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.362876892089844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.362106323242188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36214828491211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 152.56403867179904, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361862182617188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36141586303711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361193656921387}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361042022705078}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 72, "power": 365.972}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360857963562012}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.56005489033487, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360292434692383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36062240600586}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360210418701172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359718322753906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359888076782227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.23099157490228, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36003303527832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359695434570312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35957145690918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359235763549805}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359640121459961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.56037447681837, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35922622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358892440795898}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35894775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359354019165039}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 72, "power": 258.673}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359068870544434}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.67997285172007, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358457565307617}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35875129699707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358539581298828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358678817749023}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358731269836426}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.44095590774205, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359058380126953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358646392822266}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359306335449219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358448028564453}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358570098876953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.7092667560305, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358207702636719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358234405517578}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358455657958984}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 73, "power": 264.696}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358585357666016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.60232555543456, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358243942260742}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358636856079102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358400344848633}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358379364013672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35833740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.57560255220363, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358665466308594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358572959899902}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358033180236816}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35838508605957}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.20652738085022, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358282089233398}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358419418334961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35842227935791}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358055114746094}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 73, "power": 329.269}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3580322265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.24501415362292, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358024597167969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358108520507812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357969284057617}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358566284179688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358064651489258}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.09838876406843, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358379364013672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358316421508789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358358383178711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358470916748047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358149528503418}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.30443050390224, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358348846435547}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35792350769043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 74, "power": 259.328}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358392715454102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358460426330566}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.34742386451518, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358633995056152}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358476638793945}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357904434204102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358171463012695}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.01502761790505, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358545303344727}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357903480529785}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358219146728516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358108520507812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358248710632324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.66714508673834, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357797622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35807991027832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357889175415039}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 74, "power": 310.496}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35851001739502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358098983764648}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.13814482915606, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358307838439941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358318328857422}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358499526977539}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358034133911133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357908248901367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.37735308659367, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358236312866211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358089447021484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35861587524414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358320236206055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358344078063965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.47828844863437, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357931137084961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357934951782227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358261108398438}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 74, "power": 268.94}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358291625976562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358189582824707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.26339990786025, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358278274536133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357937812805176}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358482360839844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357876777648926}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.28657327532528, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358512878417969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358345031738281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358097076416016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358293533325195}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357551574707031}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.38817903215678, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35798454284668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35827922821045}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358026504516602}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 75, "power": 264.259}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358270645141602}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35817813873291}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.820216802315, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35809326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358072280883789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358430862426758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357963562011719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [144, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358388900756836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.10151205185286, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [145, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35820198059082}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [146, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358226776123047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [147, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35807991027832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [148, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [149, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358162879943848}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.2486948959535, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [150, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358267784118652}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [151, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358076095581055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [152, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358078002929688}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 74, "power": 256.002}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [153, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358617782592773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [154, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358043670654297}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.23357089017136, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [155, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357868194580078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [156, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358116149902344}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [157, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35775375366211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [158, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358063697814941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [159, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357721328735352}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.1732091200342, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [160, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358312606811523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [161, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357690811157227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [162, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357881546020508}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [163, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358210563659668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [164, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.82914608530496, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [165, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [166, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358190536499023}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [167, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357914924621582}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 75, "power": 279.826}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [168, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357951164245605}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [169, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357860565185547}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.5425258033686, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [170, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357804298400879}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [171, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357797622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [172, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358224868774414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [173, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35790729522705}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [174, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3578519821167}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.01111664187604, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [175, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357871055603027}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [176, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358051300048828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [177, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358205795288086}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [178, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358312606811523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [179, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357698440551758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.59527449362847, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 75, "power": 290.145}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712628253.4846983, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bert-fp32.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bert-fp32.D0.data new file mode 100644 index 000000000..f1721326c --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bert-fp32.D0.data @@ -0,0 +1,226 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "fp32", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-fp32", "tag": ["bert-fp32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 64, "power": 91.125, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 64, "power": 97.037, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712628137.66085, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712628140.763577}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.479292869567871}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29667.25, 81920.0], "load": 0.33, "temperature": 66, "power": 289.631}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.323901176452637}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.044936180114746}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.83090591430664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.708606719970703}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 67, "power": 269.364}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.565232276916504}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.491073608398438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.468774795532227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.4805908203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 21.589878417254138, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.515676498413086}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 68, "power": 289.658}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.562061309814453}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.504788431100227, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.608406066894531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.640921592712402}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.50085981545638, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.648000717163086}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 68, "power": 291.498}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.63291072845459}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.47590684630879, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.614981651306152}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.622065544128418}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.473430100185873, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.669747352600098}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 69, "power": 292.475}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.752553939819336}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.365653986231177, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.848938941955566}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.93950080871582}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.426560904933893, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.007094383239746}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.038527488708496}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.343916401683646, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31577.25, 81920.0], "load": 0.97, "temperature": 70, "power": 291.804}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.016993522644043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.91185188293457}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.357061636622113, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.875959396362305}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.918696403503418}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.328927601330182, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 71, "power": 304.244}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.017876625061035}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.246811866760254}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.328203004659226, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.431685447692871}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.560530662536621}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.300892040224223, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.64004135131836}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 71, "power": 297.632}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.689648628234863}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.33322906076584, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.736746788024902}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.803998947143555}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.27631517752865, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.90011978149414}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 72, "power": 283.909}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.020580291748047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.317724485249173, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.152454376220703}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.279325485229492}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.30375809852236, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.385612487792969}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 72, "power": 305.018}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.46435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.290358169858198, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.513260841369629}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.550541877746582}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.282640144872286, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.602174758911133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.679553985595703}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.277299219299575, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31577.25, 81920.0], "load": 0.98, "temperature": 73, "power": 298.87}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.78598690032959}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.926142692565918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.279959182156478, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.089705467224121}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.263202667236328}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.279221959489973, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.435498237609863}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 74, "power": 283.611}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.600459098815918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.25997266839324, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.752622604370117}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.883284568786621}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.27689019499258, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.991811752319336}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 74, "power": 310.544}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.081356048583984}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.2605440593021, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.160579681396484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.241079330444336}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.2313704488936, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.328137397766113}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 74, "power": 301.127}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.415624618530273}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.219262204247958, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.503317832946777}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.589831352233887}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.239161359576446, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.672152519226074}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 74, "power": 313.067}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.75645637512207}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.201785553085536, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.840280532836914}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.907486915588379}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.21330375760299, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.961742401123047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.999088287353516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.190551185577032, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 75, "power": 285.079}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.025399208068848}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.040262222290039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.168059302589274, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 75, "power": 303.454}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712628203.5527153, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bert-fp32.D1.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bert-fp32.D1.data new file mode 100644 index 000000000..29ca7fd15 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bert-fp32.D1.data @@ -0,0 +1,226 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "fp32", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-fp32", "tag": ["bert-fp32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 62, "power": 88.938, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 63, "power": 100.175, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712628140.745648, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp32.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712628140.7719352}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.479292869567871}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [29667.25, 81920.0], "load": 0, "temperature": 65, "power": 306.139}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.323901176452637}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.044936180114746}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.83090591430664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.708606719970703}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 68, "power": 311.882}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.565232276916504}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.491073608398438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.468774795532227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.4805908203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.375327843343932, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.515676498413086}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 69, "power": 293.972}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.562061309814453}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.4111615934622, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.608406066894531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.640921592712402}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.336654699860983, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.648000717163086}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 70, "power": 299.513}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.63291072845459}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.33274908898837, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.614981651306152}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.622065544128418}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.328897504824855, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.669747352600098}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 71, "power": 293.492}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.752553939819336}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.265014542315242, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.848938941955566}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.93950080871582}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.262206895515526, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.007094383239746}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.038527488708496}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 71, "power": 283.642}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.21447859723688, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.016993522644043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.91185188293457}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.217338971136897, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.875959396362305}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.918696403503418}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.152585259584402, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 72, "power": 302.153}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.017876625061035}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.246811866760254}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.191158382226188, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.431685447692871}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.560530662536621}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.134316435056196, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.64004135131836}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 73, "power": 288.026}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.689648628234863}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.17095675985794, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.736746788024902}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.803998947143555}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.160271591800885, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.90011978149414}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 73, "power": 294.944}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.020580291748047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.193150049581487, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.152454376220703}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.279325485229492}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.15657851697573, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.385612487792969}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 74, "power": 292.613}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.46435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.158852000163623, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.513260841369629}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.550541877746582}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.12747825753281, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.602174758911133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.679553985595703}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 74, "power": 330.012}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.098315382372572, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.78598690032959}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.926142692565918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.136372006662384, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.089705467224121}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.263202667236328}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.121329458651946, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 75, "power": 303.715}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.435498237609863}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.600459098815918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.09547282311756, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.752622604370117}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.883284568786621}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.115748852651212, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 75, "power": 301.283}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.991811752319336}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.081356048583984}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.086662191535307, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.160579681396484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.241079330444336}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.05614430566372, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.328137397766113}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 75, "power": 305.75}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.415624618530273}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.021121964769144, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.503317832946777}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.589831352233887}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.023584818026922, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.672152519226074}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31577.25, 81920.0], "load": 0.98, "temperature": 76, "power": 313.312}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.75645637512207}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.031529180722647, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.840280532836914}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.907486915588379}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 26.999722770643427, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.961742401123047}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 76, "power": 306.124}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.999088287353516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.008686999390587, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.025399208068848}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.040262222290039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 26.97984955849557, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31577.25, 81920.0], "load": 1.0, "temperature": 77, "power": 306.75}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp32.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "fp32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712628204.2952192, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bert-tf32-fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bert-tf32-fp16.D0.data new file mode 100644 index 000000000..600a0c052 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bert-tf32-fp16.D0.data @@ -0,0 +1,445 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 3.0, "name": "bert-tf32-fp16", "tag": ["bert-tf32-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 69, "power": 99.388, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 71, "power": 111.425, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712628306.67822, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712628309.7467341}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.47928237915039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.324040412902832}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24613.25, 81920.0], "load": 0.16, "temperature": 67, "power": 296.844}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.062586784362793}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.039785385131836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.756427764892578}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.622476577758789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.533597946166992}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.476072311401367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.44356918334961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.426374435424805}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.416016578674316}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.406279563903809}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.395854949951172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3849458694458}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.377933502197266}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.374731063842773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.374163627624512}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.373811721801758}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 69, "power": 320.056}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.372345924377441}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.369985580444336}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.367439270019531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.366805076599121}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.366157531738281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36543083190918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.365005493164062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.364080429077148}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.363504409790039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.362876892089844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.362106323242188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36214828491211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 153.21231175020208, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361862182617188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36141586303711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361193656921387}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 70, "power": 276.296}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361042022705078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360857963562012}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.742542686138, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360292434692383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36062240600586}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360210418701172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359718322753906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359888076782227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.08639049248956, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36003303527832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359695434570312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35957145690918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359235763549805}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359640121459961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.2607009269082, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35922622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358892440795898}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35894775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 71, "power": 289.35}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359354019165039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359068870544434}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.34716345814527, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358457565307617}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35875129699707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358539581298828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358678817749023}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358731269836426}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.3495028265196, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359058380126953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358646392822266}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359306335449219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358448028564453}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358570098876953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.39336187683367, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358207702636719}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 71, "power": 343.5}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358234405517578}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358455657958984}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358585357666016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.98058877530931, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358243942260742}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358636856079102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358400344848633}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358379364013672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35833740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.32470612830173, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358665466308594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358572959899902}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358033180236816}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35838508605957}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.8870698507736, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358282089233398}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358419418334961}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 72, "power": 329.393}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35842227935791}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358055114746094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3580322265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.17027168948428, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358024597167969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358108520507812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357969284057617}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358566284179688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358064651489258}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.9621998120965, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358379364013672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358316421508789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358358383178711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358470916748047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358149528503418}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.23213750866972, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358348846435547}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35792350769043}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 71, "power": 265.927}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358392715454102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358460426330566}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.53585892270584, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358633995056152}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358476638793945}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357904434204102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358171463012695}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.15270960087696, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358545303344727}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357903480529785}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358219146728516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358108520507812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358248710632324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.35099994363586, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357797622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35807991027832}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 73, "power": 247.502}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357889175415039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35851001739502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358098983764648}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.2598682630164, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358307838439941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358318328857422}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358499526977539}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358034133911133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357908248901367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.80759451463214, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358236312866211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358089447021484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35861587524414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358320236206055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358344078063965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.18770743268638, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357931137084961}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 73, "power": 294.429}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357934951782227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358261108398438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358291625976562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358189582824707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.71210203016122, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358278274536133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357937812805176}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358482360839844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357876777648926}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.04959374269535, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358512878417969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358345031738281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358097076416016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358293533325195}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357551574707031}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.782274181026, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35798454284668}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 72, "power": 351.858}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35827922821045}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358026504516602}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358270645141602}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35817813873291}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.2793696572628, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35809326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358072280883789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358430862426758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357963562011719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [144, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358388900756836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.13857980417941, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [145, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35820198059082}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [146, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358226776123047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [147, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35807991027832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [148, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [149, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358162879943848}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.02121668057552, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [150, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358267784118652}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 72, "power": 270.722}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [151, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358076095581055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [152, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358078002929688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [153, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358617782592773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [154, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358043670654297}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.08028574833327, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [155, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357868194580078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [156, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358116149902344}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [157, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35775375366211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [158, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358063697814941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [159, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357721328735352}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.0913251336403, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [160, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358312606811523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [161, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357690811157227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [162, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357881546020508}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [163, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358210563659668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [164, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.07735878230935, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [165, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 74, "power": 268.682}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [166, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358190536499023}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [167, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357914924621582}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [168, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357951164245605}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [169, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357860565185547}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.06058081183278, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [170, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357804298400879}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [171, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357797622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [172, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358224868774414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [173, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35790729522705}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [174, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3578519821167}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.96033201440693, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [175, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357871055603027}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [176, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358051300048828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [177, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358205795288086}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [178, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358312606811523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [179, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357698440551758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.0073844679137, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 74, "power": 291.315}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 74, "power": 291.315}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712628352.5477376, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bert-tf32-fp16.D1.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bert-tf32-fp16.D1.data new file mode 100644 index 000000000..f5af78dde --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bert-tf32-fp16.D1.data @@ -0,0 +1,444 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 3.0, "name": "bert-tf32-fp16", "tag": ["bert-tf32-fp16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 68, "power": 94.752, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 69, "power": 104.337, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712628309.729009, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712628309.7550993}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.47928237915039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.324040412902832}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24613.25, 81920.0], "load": 0.9, "temperature": 69, "power": 233.698}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.062586784362793}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.039785385131836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.756427764892578}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.622476577758789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.533597946166992}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.476072311401367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.44356918334961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.426374435424805}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.416016578674316}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.406279563903809}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.395854949951172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3849458694458}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.377933502197266}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.374731063842773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.374163627624512}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.373811721801758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.372345924377441}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 72, "power": 306.835}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.369985580444336}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.367439270019531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.366805076599121}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.366157531738281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36543083190918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.365005493164062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.364080429077148}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.363504409790039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.362876892089844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.362106323242188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36214828491211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.23596210127, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361862182617188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36141586303711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361193656921387}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361042022705078}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 72, "power": 280.996}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360857963562012}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.5068470488006, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360292434692383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36062240600586}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360210418701172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359718322753906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359888076782227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.13294710173292, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36003303527832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359695434570312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35957145690918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359235763549805}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359640121459961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.6214582273411, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35922622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358892440795898}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35894775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359354019165039}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 73, "power": 329.371}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359068870544434}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.13502173927736, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358457565307617}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35875129699707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358539581298828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358678817749023}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358731269836426}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.55376402212676, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359058380126953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358646392822266}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359306335449219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358448028564453}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358570098876953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.1620299700469, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358207702636719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358234405517578}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 74, "power": 328.474}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358455657958984}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358585357666016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.66174626328598, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358243942260742}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358636856079102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358400344848633}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358379364013672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35833740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.3029980369605, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358665466308594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358572959899902}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358033180236816}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35838508605957}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.08313115250354, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358282089233398}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358419418334961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35842227935791}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 73, "power": 269.263}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358055114746094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3580322265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.05088926937825, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358024597167969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358108520507812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357969284057617}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358566284179688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358064651489258}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.24998325794405, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358379364013672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358316421508789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358358383178711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358470916748047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358149528503418}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.37487017036574, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358348846435547}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35792350769043}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 74, "power": 263.148}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358392715454102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358460426330566}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.40187881008026, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358633995056152}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358476638793945}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357904434204102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358171463012695}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.1067875449593, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358545303344727}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357903480529785}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358219146728516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358108520507812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358248710632324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.2607571352176, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357797622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35807991027832}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 75, "power": 284.377}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357889175415039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35851001739502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358098983764648}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 147.91953139033285, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358307838439941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358318328857422}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358499526977539}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358034133911133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357908248901367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.2035452611117, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358236312866211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358089447021484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35861587524414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358320236206055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358344078063965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 147.96303050466133, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357931137084961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357934951782227}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 75, "power": 288.447}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358261108398438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358291625976562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358189582824707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.535638156127, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358278274536133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357937812805176}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358482360839844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357876777648926}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.13896133789393, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358512878417969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358345031738281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358097076416016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358293533325195}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357551574707031}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 147.74718185940304, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35798454284668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35827922821045}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24613.25, 81920.0], "load": 0.97, "temperature": 75, "power": 355.104}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358026504516602}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358270645141602}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35817813873291}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.22550086098497, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35809326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358072280883789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358430862426758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357963562011719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [144, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358388900756836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.33676246208432, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [145, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35820198059082}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [146, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358226776123047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [147, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35807991027832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [148, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [149, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358162879943848}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.4456561972643, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [150, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358267784118652}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 75, "power": 350.695}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [151, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358076095581055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [152, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358078002929688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [153, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358617782592773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [154, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358043670654297}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.4215033144502, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [155, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357868194580078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [156, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358116149902344}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [157, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35775375366211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [158, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358063697814941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [159, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357721328735352}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.45865519888875, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [160, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358312606811523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [161, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357690811157227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [162, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357881546020508}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [163, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358210563659668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [164, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.1565788062103, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [165, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 76, "power": 324.496}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [166, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358190536499023}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [167, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357914924621582}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [168, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357951164245605}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [169, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357860565185547}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.47080131927814, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [170, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357804298400879}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [171, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357797622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [172, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358224868774414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [173, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35790729522705}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [174, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3578519821167}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 148.35045214876658, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [175, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357871055603027}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [176, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358051300048828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [177, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358205795288086}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [178, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358312606811523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [179, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357698440551758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 147.96831663562514, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24613.25, 81920.0], "load": 0.98, "temperature": 75, "power": 275.303}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712628352.433539, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bert-tf32.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bert-tf32.D0.data new file mode 100644 index 000000000..748d57115 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bert-tf32.D0.data @@ -0,0 +1,372 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-tf32", "tag": ["bert-tf32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 69, "power": 96.632, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.05, "memory": 0.010771942138671876}, "temperature": 71, "power": 109.043, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712628256.737688, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712628259.7974958}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.479286193847656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.323932647705078}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31579.25, 81920.0], "load": 0.79, "temperature": 68, "power": 272.435}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.044816970825195}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.830974578857422}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.708649635314941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.565240859985352}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.491065979003906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.468748092651367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.480533599853516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.515584945678711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.561928749084473}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.608244895935059}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.640739440917969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.647823333740234}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.632768630981445}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.614849090576172}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31579.25, 81920.0], "load": 0.99, "temperature": 69, "power": 291.294}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.621868133544922}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.669413566589355}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.752106666564941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.848526000976562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.939350128173828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.007367134094238}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.03938102722168}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.01879596710205}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 113.96945139852745, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.915424346923828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.861414909362793}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.006791114807129}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31579.25, 81920.0], "load": 0.98, "temperature": 70, "power": 306.848}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.027995109558105}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 114.87728831845011, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.285082817077637}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.481186866760254}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.615607261657715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.69710636138916}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 115.29958575110786, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.744179725646973}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.784246444702148}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.841986656188965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.928215026855469}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 114.31679018237116, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.041173934936523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.170112609863281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.299582481384277}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.414661407470703}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31579.25, 81920.0], "load": 1.0, "temperature": 70, "power": 294.823}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 114.68376186066297, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.506692886352539}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.572625160217285}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.624845504760742}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.687198638916016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 114.54803275808129, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.77263069152832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.880777359008789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.014769554138184}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.169602394104004}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 114.78459931013593, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.335892677307129}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.508112907409668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.685002326965332}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31579.25, 81920.0], "load": 0.98, "temperature": 71, "power": 293.521}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.864999771118164}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 115.31250636499135, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.042545318603516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.21193790435791}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.368173599243164}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.505072593688965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 114.94537047503101, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.626041412353516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.737335205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.839637756347656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.941758155822754}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 114.88265739526524, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.044066429138184}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.145570755004883}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.23983097076416}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31579.25, 81920.0], "load": 0.98, "temperature": 72, "power": 314.93}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.333688735961914}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 114.3401089382177, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.425797462463379}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.513972282409668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.597168922424316}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.675834655761719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 113.2633754956632, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.757519721984863}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.833535194396973}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.900604248046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.956380844116211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 113.2846902031146, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.990116119384766}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.019303321838379}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.04137897491455}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31579.25, 81920.0], "load": 0.98, "temperature": 72, "power": 299.288}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.049532890319824}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 114.03363194619699, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.041813850402832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.022936820983887}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.001933097839355}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.978167533874512}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 113.67715823473922, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.95536994934082}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.924501419067383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.874979019165039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.806092262268066}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 114.40626079181186, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.719812393188477}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.609824180603027}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.481300354003906}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31579.25, 81920.0], "load": 0.98, "temperature": 72, "power": 292.315}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.328900337219238}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 113.91411140138683, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.169635772705078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.010271072387695}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.85876178741455}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.748523712158203}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 114.3977730328521, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.666166305541992}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.625612258911133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.650134086608887}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.727069854736328}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 113.39927177326527, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.84864330291748}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.988987922668457}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31579.25, 81920.0], "load": 0.99, "temperature": 73, "power": 312.335}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.1289644241333}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.244298934936523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 113.91866590306037, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.312968254089355}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.326717376708984}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.288104057312012}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.236703872680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 113.36328442675625, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.258355140686035}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.536263465881348}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.127537727355957}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.74303913116455}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 113.51925709415204, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.15192413330078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.16707420349121}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31579.25, 81920.0], "load": 0.98, "temperature": 73, "power": 310.633}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.868474960327148}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.518776893615723}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 114.14128693314764, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.283734321594238}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.241960525512695}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.301491737365723}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.411982536315918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 113.72374131029562, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.539347648620605}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.672064781188965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.800228118896484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.920104026794434}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 114.83453652669364, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.03019905090332}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.13632583618164}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31579.25, 81920.0], "load": 1.0, "temperature": 74, "power": 294.513}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.23299217224121}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.31147003173828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.08996156126238, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.37289810180664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.41785430908203}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.443866729736328}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.45807647705078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.55560376090949, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.459491729736328}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.45636749267578}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.44569206237793}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.440540313720703}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.30174850274787, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.448402404785156}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.4744815826416}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31579.25, 81920.0], "load": 0.98, "temperature": 74, "power": 287.297}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.53107261657715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.61451530456543}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.87031112944881, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.71721076965332}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.82810401916504}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.940759658813477}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 17.044357299804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.67702172195985, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 17.134565353393555}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 17.20937156677246}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 17.272945404052734}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 17.317352294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.86275944022186, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31579.25, 81920.0], "load": 0.98, "temperature": 74, "power": 287.596}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712628303.488447, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bert-tf32.D1.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bert-tf32.D1.data new file mode 100644 index 000000000..89d28d2e7 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bert-tf32.D1.data @@ -0,0 +1,372 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32", "--num-workers": 8, "--model": "Bert", "--batch-size": 32}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "weight": 0.0, "name": "bert-tf32", "tag": ["bert-tf32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 67, "power": 94.299, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 69, "power": 103.555, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712628259.778706, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712628259.8053145}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.479286193847656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.323932647705078}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31579.25, 81920.0], "load": 0.95, "temperature": 70, "power": 320.955}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.044816970825195}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.830974578857422}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.708649635314941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.565240859985352}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.491065979003906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.468748092651367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.480533599853516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.515584945678711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.561928749084473}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.608244895935059}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.640739440917969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.647823333740234}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.632768630981445}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31579.25, 81920.0], "load": 0.98, "temperature": 72, "power": 306.51}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.614849090576172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.621868133544922}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.669413566589355}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.752106666564941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.848526000976562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.939350128173828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.007367134094238}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.03938102722168}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.01879596710205}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 112.01449459158928, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.915424346923828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.861414909362793}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.006791114807129}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31579.25, 81920.0], "load": 1.0, "temperature": 72, "power": 263.556}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.027995109558105}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 113.98140652304082, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.285082817077637}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.481186866760254}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.615607261657715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.69710636138916}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 114.21623379726836, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.744179725646973}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.784246444702148}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.841986656188965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.928215026855469}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 114.05993762929934, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.041173934936523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.170112609863281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.299582481384277}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31579.25, 81920.0], "load": 0.98, "temperature": 73, "power": 302.317}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.414661407470703}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 114.38490172343246, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.506692886352539}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.572625160217285}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.624845504760742}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.687198638916016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 114.80379840708437, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.77263069152832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.880777359008789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.014769554138184}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.169602394104004}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 115.18652018228916, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.335892677307129}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.508112907409668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.685002326965332}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31579.25, 81920.0], "load": 0.99, "temperature": 73, "power": 304.929}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.864999771118164}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 115.09758542035867, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.042545318603516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.21193790435791}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.368173599243164}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.505072593688965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 114.9738592155136, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.626041412353516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.737335205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.839637756347656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.941758155822754}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 114.57523013817932, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.044066429138184}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.145570755004883}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.23983097076416}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31579.25, 81920.0], "load": 0.98, "temperature": 74, "power": 237.623}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.333688735961914}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 113.95916632958198, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.425797462463379}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.513972282409668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.597168922424316}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.675834655761719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 113.98392423984386, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.757519721984863}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.833535194396973}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.900604248046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.956380844116211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 113.63719462852642, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.990116119384766}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.019303321838379}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31579.25, 81920.0], "load": 0.98, "temperature": 74, "power": 291.828}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.04137897491455}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.049532890319824}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 113.75454060691347, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.041813850402832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.022936820983887}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.001933097839355}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.978167533874512}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 114.00938261947304, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.95536994934082}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.924501419067383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.874979019165039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.806092262268066}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 113.90301196261757, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.719812393188477}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.609824180603027}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31579.25, 81920.0], "load": 0.99, "temperature": 75, "power": 307.412}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.481300354003906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.328900337219238}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 113.78103717465335, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.169635772705078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.010271072387695}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.85876178741455}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.748523712158203}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 113.94682493194593, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.666166305541992}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.625612258911133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.650134086608887}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.727069854736328}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 113.66599691878832, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.84864330291748}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.988987922668457}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31579.25, 81920.0], "load": 0.98, "temperature": 74, "power": 294.811}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.1289644241333}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.244298934936523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 113.23567531883506, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.312968254089355}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.326717376708984}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.288104057312012}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.236703872680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 113.24463852813905, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.258355140686035}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.536263465881348}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.127537727355957}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.74303913116455}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 113.24612877708154, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.15192413330078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.16707420349121}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31579.25, 81920.0], "load": 0.99, "temperature": 75, "power": 311.09}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.868474960327148}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.518776893615723}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 113.33925098813852, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.283734321594238}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.241960525512695}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.301491737365723}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.411982536315918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 113.57915882348594, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.539347648620605}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.672064781188965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.800228118896484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.920104026794434}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 114.71414560401585, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.03019905090332}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31579.25, 81920.0], "load": 0.98, "temperature": 76, "power": 287.736}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.13632583618164}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.23299217224121}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.31147003173828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 115.96221723024406, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.37289810180664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.41785430908203}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.443866729736328}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.45807647705078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.24748546925957, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.459491729736328}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.45636749267578}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.44569206237793}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.440540313720703}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.53841792830082, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.448402404785156}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31579.25, 81920.0], "load": 0.98, "temperature": 75, "power": 303.959}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.4744815826416}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.53107261657715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.61451530456543}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.46876912157863, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.71721076965332}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.82810401916504}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.940759658813477}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 17.044357299804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.72881471392024, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 17.134565353393555}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 17.20937156677246}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 17.272945404052734}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 17.317352294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.47632085080566, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31579.25, 81920.0], "load": 0.99, "temperature": 76, "power": 300.284}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32.D1-fb5679c624c0e6290d39628373b49ebc.json", "-m", "bench", "--precision", "tf32", "--num-workers", "8", "--model", "Bert", "--batch-size", "32"], "time": 1712628303.4029007, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bf16.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bf16.D0.data new file mode 100644 index 000000000..c13e95d39 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bf16.D0.data @@ -0,0 +1,105 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "bf16"}, "weight": 0.0, "name": "bf16", "tag": ["bf16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 66, "power": 97.925, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 69, "power": 108.849, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712627476.902421, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "bf16"], "time": 1712627479.696159}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 182.48564661485815, "units": "Tflops", "t": 1712627481.511671}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [882.5, 81920.0], "load": 0, "temperature": 64, "power": 91.222}}, "t": 1712627481.0195913}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.6265166419043, "units": "Tflops", "t": 1712627481.594187}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 0.07, "temperature": 64, "power": 379.561}}, "t": 1712627481.5344098}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.44889640902505, "units": "Tflops", "t": 1712627481.675306}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.50403094528855, "units": "Tflops", "t": 1712627481.7563508}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.45368982758623, "units": "Tflops", "t": 1712627481.837423}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.47126714411195, "units": "Tflops", "t": 1712627481.9184797}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.50083412874136, "units": "Tflops", "t": 1712627481.9995215}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.4840520768818, "units": "Tflops", "t": 1712627482.0805683}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.4968382139206, "units": "Tflops", "t": 1712627482.1616201}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.4153472185618, "units": "Tflops", "t": 1712627482.2426865}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 264.0636050930978, "units": "Tflops", "t": 1712627482.3260207}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 259.7064879473449, "units": "Tflops", "t": 1712627482.41074}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.23134714771237, "units": "Tflops", "t": 1712627482.4933865}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.2327838855913, "units": "Tflops", "t": 1712627482.575416}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.4536658551376, "units": "Tflops", "t": 1712627482.6570807}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.44010891465933, "units": "Tflops", "t": 1712627482.738149}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.306768625071, "units": "Tflops", "t": 1712627482.8192906}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 69, "power": 305.883}}, "t": 1712627482.744733}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.2588939169868, "units": "Tflops", "t": 1712627482.900831}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.5168189643382, "units": "Tflops", "t": 1712627482.981879}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.50642860710354, "units": "Tflops", "t": 1712627483.0629232}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.5000349363673, "units": "Tflops", "t": 1712627483.1439655}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.4904303347276, "units": "Tflops", "t": 1712627483.2253141}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 263.68463288777133, "units": "Tflops", "t": 1712627483.3087673}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 69, "power": 305.7}}, "t": 1712627483.2711635}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 264.2693999912547, "units": "Tflops", "t": 1712627483.3921154}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.1672739468332, "units": "Tflops", "t": 1712627483.4741676}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.40841472432066, "units": "Tflops", "t": 1712627483.5561426}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.4021661289366, "units": "Tflops", "t": 1712627483.6381187}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.08262475106636, "units": "Tflops", "t": 1712627483.71989}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.41774331428735, "units": "Tflops", "t": 1712627483.800981}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.52321342561675, "units": "Tflops", "t": 1712627483.8820176}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 70, "power": 298.324}}, "t": 1712627483.8011818}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.4840520768818, "units": "Tflops", "t": 1712627483.9631352}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.503231734094, "units": "Tflops", "t": 1712627484.0441847}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.7039850214775, "units": "Tflops", "t": 1712627484.1254706}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 265.9189456176831, "units": "Tflops", "t": 1712627484.2082121}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 264.02051957116004, "units": "Tflops", "t": 1712627484.291549}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.5244577359129, "units": "Tflops", "t": 1712627484.3741024}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.2382635850158, "units": "Tflops", "t": 1712627484.4567437}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.882988877765, "units": "Tflops", "t": 1712627484.5388777}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.11537111456977, "units": "Tflops", "t": 1712627484.6200342}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 70, "power": 291.984}}, "t": 1712627484.5861502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.228908517735, "units": "Tflops", "t": 1712627484.7015078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.8081474714394, "units": "Tflops", "t": 1712627484.7833605}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.2373629304844, "units": "Tflops", "t": 1712627484.865095}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.41135381969735, "units": "Tflops", "t": 1712627484.9462285}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.55359123028205, "units": "Tflops", "t": 1712627485.0272567}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.8900282202622, "units": "Tflops", "t": 1712627485.1084821}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.03553064649464, "units": "Tflops", "t": 1712627485.1911983}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 70, "power": 306.391}}, "t": 1712627485.1159792}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 265.67077615064466, "units": "Tflops", "t": 1712627485.2741256}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.776540255074, "units": "Tflops", "t": 1712627485.3566034}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.17986940757027, "units": "Tflops", "t": 1712627485.4392638}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.08231468841086, "units": "Tflops", "t": 1712627485.5213406}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.4353159757144, "units": "Tflops", "t": 1712627485.6024098}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.76338633585766, "units": "Tflops", "t": 1712627485.683982}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.41700701804535, "units": "Tflops", "t": 1712627485.765954}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.47334699260756, "units": "Tflops", "t": 1712627485.8476038}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.48594442948814, "units": "Tflops", "t": 1712627485.9292486}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.4222274074692, "units": "Tflops", "t": 1712627486.010621}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.0038472254232, "units": "Tflops", "t": 1712627486.0918105}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.67152972200125, "units": "Tflops", "t": 1712627486.174029}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 263.51738627052873, "units": "Tflops", "t": 1712627486.2575338}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.1033546261377, "units": "Tflops", "t": 1712627486.339618}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.3724892735286, "units": "Tflops", "t": 1712627486.4216082}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 70, "power": 298.128}}, "t": 1712627486.4083786}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.3779555695644, "units": "Tflops", "t": 1712627486.5036669}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.17528919503917, "units": "Tflops", "t": 1712627486.5854185}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.84625664801376, "units": "Tflops", "t": 1712627486.6669726}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.276694781637, "units": "Tflops", "t": 1712627486.7496114}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.6334714591363, "units": "Tflops", "t": 1712627486.8318245}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.1766648267423, "units": "Tflops", "t": 1712627486.9134367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.5248120879977, "units": "Tflops", "t": 1712627486.994478}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 70, "power": 298.813}}, "t": 1712627486.9297602}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.1974794575321, "units": "Tflops", "t": 1712627487.0756886}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.53370004984197, "units": "Tflops", "t": 1712627487.1582491}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.41821717725287, "units": "Tflops", "t": 1712627487.240837}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.03219244924844, "units": "Tflops", "t": 1712627487.3232338}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.6552177126102, "units": "Tflops", "t": 1712627487.4054382}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.9711978506145, "units": "Tflops", "t": 1712627487.4872413}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.96159372626124, "units": "Tflops", "t": 1712627487.5693529}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.57851056849927, "units": "Tflops", "t": 1712627487.6506689}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.38498399167713, "units": "Tflops", "t": 1712627487.7326488}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.32954755158147, "units": "Tflops", "t": 1712627487.8146572}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.3389154824633, "units": "Tflops", "t": 1712627487.8966548}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.7270277835438, "units": "Tflops", "t": 1712627487.9779294}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.4348122944528, "units": "Tflops", "t": 1712627488.0602221}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.19062430496183, "units": "Tflops", "t": 1712627488.1428876}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.30615178872495, "units": "Tflops", "t": 1712627488.2252119}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.3381347965733, "units": "Tflops", "t": 1712627488.307217}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 70, "power": 299.295}}, "t": 1712627488.231188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.3599957187142, "units": "Tflops", "t": 1712627488.389261}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.349845561911, "units": "Tflops", "t": 1712627488.471256}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.3982609045607, "units": "Tflops", "t": 1712627488.5532508}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.3646806654517, "units": "Tflops", "t": 1712627488.6352386}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.32174144214645, "units": "Tflops", "t": 1712627488.7172403}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.33110882797234, "units": "Tflops", "t": 1712627488.799246}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 71, "power": 299.581}}, "t": 1712627488.7549875}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "bf16"], "time": 1712627490.0236285, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bf16.D1.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bf16.D1.data new file mode 100644 index 000000000..9b8c04420 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/bf16.D1.data @@ -0,0 +1,106 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "bf16"}, "weight": 0.0, "name": "bf16", "tag": ["bf16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 65, "power": 92.42, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 68, "power": 102.651, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712627479.685485, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "bf16"], "time": 1712627479.6966667}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 185.61267091665124, "units": "Tflops", "t": 1712627481.5334263}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [882.5, 81920.0], "load": 0, "temperature": 66, "power": 100.672}}, "t": 1712627481.0746977}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.8542045824883, "units": "Tflops", "t": 1712627481.6161788}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 0, "temperature": 70, "power": 329.048}}, "t": 1712627481.5940304}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.7263941438459, "units": "Tflops", "t": 1712627481.6987343}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.7294795140119, "units": "Tflops", "t": 1712627481.7812343}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.67858003228974, "units": "Tflops", "t": 1712627481.8637533}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.6238462141339, "units": "Tflops", "t": 1712627481.9462779}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.7341077031015, "units": "Tflops", "t": 1712627482.0287702}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.663930752133, "units": "Tflops", "t": 1712627482.111296}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.68166429636454, "units": "Tflops", "t": 1712627482.1938114}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.57375748502955, "units": "Tflops", "t": 1712627482.2763605}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.5329298325322, "units": "Tflops", "t": 1712627482.358914}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.65390848773393, "units": "Tflops", "t": 1712627482.441439}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.63155384320095, "units": "Tflops", "t": 1712627482.52396}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.738736052807, "units": "Tflops", "t": 1712627482.6064508}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.7148246414192, "units": "Tflops", "t": 1712627482.6889563}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.66161785501964, "units": "Tflops", "t": 1712627482.77147}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 72, "power": 297.094}}, "t": 1712627482.743899}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.26593292825214, "units": "Tflops", "t": 1712627482.8541787}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.62692921228626, "units": "Tflops", "t": 1712627482.936893}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.6485121958594, "units": "Tflops", "t": 1712627483.0194256}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.6585340546125, "units": "Tflops", "t": 1712627483.10194}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.69323092206196, "units": "Tflops", "t": 1712627483.1844473}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.3324938583765, "units": "Tflops", "t": 1712627483.2667568}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.02700320104776, "units": "Tflops", "t": 1712627483.3488517}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 72, "power": 297.592}}, "t": 1712627483.2708414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.98211618026505, "units": "Tflops", "t": 1712627483.4305067}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.75030762982044, "units": "Tflops", "t": 1712627483.5130055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.5923185811412, "units": "Tflops", "t": 1712627483.5952573}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.39279378134785, "units": "Tflops", "t": 1712627483.677239}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 265.2497968749576, "units": "Tflops", "t": 1712627483.760192}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 263.30599521126084, "units": "Tflops", "t": 1712627483.8437562}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 73, "power": 289.701}}, "t": 1712627483.8022668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.05925380622335, "units": "Tflops", "t": 1712627483.9262123}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.6151360986973, "units": "Tflops", "t": 1712627484.0078228}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.5404843274174, "units": "Tflops", "t": 1712627484.089767}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 264.6523342827524, "units": "Tflops", "t": 1712627484.1729043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 263.7645642856874, "units": "Tflops", "t": 1712627484.2563362}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.5674776873093, "units": "Tflops", "t": 1712627484.3385684}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.3968213112954, "units": "Tflops", "t": 1712627484.4208524}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.4521630397576, "units": "Tflops", "t": 1712627484.5028136}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.07065641987884, "units": "Tflops", "t": 1712627484.5842829}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.20042953744155, "units": "Tflops", "t": 1712627484.6660283}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 73, "power": 306.531}}, "t": 1712627484.5866387}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 264.7518510599055, "units": "Tflops", "t": 1712627484.7492266}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 263.6695569242206, "units": "Tflops", "t": 1712627484.8326771}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.1776150183154, "units": "Tflops", "t": 1712627484.9150321}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.0857648739458, "units": "Tflops", "t": 1712627484.9968293}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.16181622962705, "units": "Tflops", "t": 1712627485.0788836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 265.2589509955013, "units": "Tflops", "t": 1712627485.1618402}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 73, "power": 304.196}}, "t": 1712627485.1163003}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 263.04167021314487, "units": "Tflops", "t": 1712627485.2455401}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 265.16515120257526, "units": "Tflops", "t": 1712627485.3285196}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.24365047479245, "units": "Tflops", "t": 1712627485.410243}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.67164069895557, "units": "Tflops", "t": 1712627485.492763}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.6451207983116, "units": "Tflops", "t": 1712627485.5749733}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.8776612718864, "units": "Tflops", "t": 1712627485.6574175}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.42085516872305, "units": "Tflops", "t": 1712627485.7396927}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.7526220657029, "units": "Tflops", "t": 1712627485.8221815}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.0909612296396, "units": "Tflops", "t": 1712627485.904698}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.09478240100617, "units": "Tflops", "t": 1712627485.9868174}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.95380901049566, "units": "Tflops", "t": 1712627486.068938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 263.8407704324542, "units": "Tflops", "t": 1712627486.1523457}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 265.22233830385255, "units": "Tflops", "t": 1712627486.2353106}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 265.8629904374694, "units": "Tflops", "t": 1712627486.3181481}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.53177698138336, "units": "Tflops", "t": 1712627486.4004016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.75647954809045, "units": "Tflops", "t": 1712627486.4829001}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 73, "power": 299.34}}, "t": 1712627486.4092002}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.44721985155945, "units": "Tflops", "t": 1712627486.5652502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.8915475640011, "units": "Tflops", "t": 1712627486.6473882}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.4474750381212, "units": "Tflops", "t": 1712627486.7293675}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.751850582613, "units": "Tflops", "t": 1712627486.8118553}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.59610243880286, "units": "Tflops", "t": 1712627486.8944}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.5537272624876, "units": "Tflops", "t": 1712627486.9772844}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 73, "power": 299.82}}, "t": 1712627486.9311004}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.7163671837523, "units": "Tflops", "t": 1712627487.0598848}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 263.3924665555257, "units": "Tflops", "t": 1712627487.1434338}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 263.0889393820177, "units": "Tflops", "t": 1712627487.2270663}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.41700701804535, "units": "Tflops", "t": 1712627487.309058}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.14229962079327, "units": "Tflops", "t": 1712627487.3908088}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.66686894501123, "units": "Tflops", "t": 1712627487.4730203}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 73, "power": 305.536}}, "t": 1712627487.4536877}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.75570804268773, "units": "Tflops", "t": 1712627487.5555556}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.06466673581485, "units": "Tflops", "t": 1712627487.6379452}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.125770728123, "units": "Tflops", "t": 1712627487.720315}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.6435674944946, "units": "Tflops", "t": 1712627487.802528}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.68860415138374, "units": "Tflops", "t": 1712627487.885048}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.68860415138374, "units": "Tflops", "t": 1712627487.967551}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 264.2587997769456, "units": "Tflops", "t": 1712627488.0508132}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 263.0619263483348, "units": "Tflops", "t": 1712627488.1344535}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 265.09655922554504, "units": "Tflops", "t": 1712627488.2174752}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.349845561911, "units": "Tflops", "t": 1712627488.2994714}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 73, "power": 293.494}}, "t": 1712627488.2407916}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.28349807019293, "units": "Tflops", "t": 1712627488.3815424}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.68397754123924, "units": "Tflops", "t": 1712627488.4640505}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.4518729822965, "units": "Tflops", "t": 1712627488.546326}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.6994001993666, "units": "Tflops", "t": 1712627488.6288354}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.63735445951727, "units": "Tflops", "t": 1712627488.7110462}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 266.82438950377167, "units": "Tflops", "t": 1712627488.7935104}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 74, "power": 304.884}}, "t": 1712627488.7603407}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.3797676459695, "units": "Tflops", "t": 1712627488.8758533}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "bf16"], "time": 1712627489.957692, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/convnext_large-fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/convnext_large-fp16.D0.data new file mode 100644 index 000000000..51e0735e6 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/convnext_large-fp16.D0.data @@ -0,0 +1,294 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-fp16", "tag": ["convnext_large-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.01, "memory": 0.010771942138671876}, "temperature": 60, "power": 89.538, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.03, "memory": 0.010771942138671876}, "temperature": 62, "power": 96.939, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712627844.893094, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712627847.9334295}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 56, "power": 83.2}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 55, "power": 82.665}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 54, "power": 82.099}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 54, "power": 82.212}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.214508056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.33624267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.254852294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.179443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.268218994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27475.25, 81920.0], "load": 1.0, "temperature": 58, "power": 303.08}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.201690673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.164154052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13348388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.16265869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0748291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077056884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.142242431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1138916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27475.25, 81920.0], "load": 1.0, "temperature": 57, "power": 336.428}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11871337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0850830078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01800537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 324.46551203896115, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.074462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9622802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.078765869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 329.6738570787095, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.048004150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0762939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 333.19724308436105, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.083831787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.153045654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27475.25, 81920.0], "load": 0.89, "temperature": 60, "power": 304.242}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962799072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 307.26176502257664, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.094970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.002777099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.088836669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 317.0348104056703, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.033294677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938385009765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.112030029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 317.39346646144406, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27475.25, 81920.0], "load": 0, "temperature": 57, "power": 84.545}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 245.95539042520568, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84075927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938690185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93060302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 225.35713520418054, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.798553466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88812255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.874664306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 312.1557971729761, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944305419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958221435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.954071044921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 312.4982312620455, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94793701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27475.25, 81920.0], "load": 0.99, "temperature": 59, "power": 265.986}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.856536865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89544677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 310.72487214709287, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9688720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.985076904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.8072200147236, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.878204345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.996490478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.023529052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 315.2515617073891, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.968353271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947784423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27475.25, 81920.0], "load": 0.89, "temperature": 61, "power": 270.163}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.955780029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 312.18368435321, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0440673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 312.5044200250814, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.907257080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.994720458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921539306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.98892260245583, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.981842041015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.964111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.957977294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 311.07823853032966, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04656982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27475.25, 81920.0], "load": 0.87, "temperature": 62, "power": 275.994}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [2, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.791717529296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 344.7615149659169, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962738037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77764892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 198.13938004629125, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83221435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86846923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85845947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 325.99144540210256, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921295166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27475.25, 81920.0], "load": 0.9, "temperature": 62, "power": 329.832}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.807586669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 340.3248306121228, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.933868408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.926849365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 312.592543470218, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95281982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90093994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88641357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.269974995314, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.918609619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98846435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.898773193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 312.07454674884065, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9808349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27475.25, 81920.0], "load": 0.91, "temperature": 63, "power": 286.925}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92547607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 315.4312395149763, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.915435791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.900665283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.936798095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 313.8802293025532, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.848236083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9835205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.911590576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 313.2439324164926, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99957275390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94586181640625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27475.25, 81920.0], "load": 0.99, "temperature": 63, "power": 137.362}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9364013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.4039690809012, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.957916259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958465576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.973114013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 315.9650814119537, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [3, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.763885498046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 279.35553638041694, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.794891357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.780670166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.772857666015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 220.40770849167137, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27475.25, 81920.0], "load": 0.99, "temperature": 63, "power": 268.217}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27475.25, 81920.0], "load": 0.9, "temperature": 64, "power": 302.396}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712627903.8325028, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/convnext_large-fp16.D1.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/convnext_large-fp16.D1.data new file mode 100644 index 000000000..72109217a --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/convnext_large-fp16.D1.data @@ -0,0 +1,294 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-fp16", "tag": ["convnext_large-fp16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 58, "power": 85.332, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 60, "power": 91.519, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712627847.915492, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712627847.9414766}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 57, "power": 88.966}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 57, "power": 88.172}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 56, "power": 87.476}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 55, "power": 86.863}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.214508056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.33624267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.254852294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.179443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.268218994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27475.25, 81920.0], "load": 0.96, "temperature": 60, "power": 118.008}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.201690673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.164154052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13348388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.16265869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0748291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077056884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.142242431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1138916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27475.25, 81920.0], "load": 0.89, "temperature": 61, "power": 292.335}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11871337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0850830078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01800537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 321.2665712361607, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.074462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9622802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.078765869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 328.3334170512294, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.048004150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0762939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 341.02501897947434, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.083831787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.153045654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27475.25, 81920.0], "load": 0.88, "temperature": 63, "power": 271.873}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962799072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 313.97372926698296, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.094970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.002777099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.088836669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 315.9937789942653, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.033294677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938385009765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.112030029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 315.3155072831601, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27475.25, 81920.0], "load": 0, "temperature": 58, "power": 89.857}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 262.5560427668102, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84075927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938690185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93060302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 230.22428131728458, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.798553466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88812255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.874664306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 311.5544153619531, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944305419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958221435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.954071044921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 312.3922208704181, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94793701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27475.25, 81920.0], "load": 0.89, "temperature": 63, "power": 303.606}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.856536865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89544677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 308.3200148788816, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9688720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.985076904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.35614847058247, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.878204345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.996490478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.023529052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 312.01340686507785, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.968353271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947784423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.955780029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27475.25, 81920.0], "load": 0.99, "temperature": 64, "power": 296.597}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 313.78715552482834, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0440673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.9714743746554, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.907257080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.994720458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921539306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 292.61538865502445, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.981842041015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.964111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.957977294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 331.42602746748616, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27475.25, 81920.0], "load": 0.99, "temperature": 64, "power": 297.461}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04656982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [2, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.791717529296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 345.9823741495939, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962738037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 135.58599033095203, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77764892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83221435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86846923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 309.7694238714971, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85845947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921295166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27475.25, 81920.0], "load": 0.89, "temperature": 64, "power": 339.495}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.807586669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 313.63405305776354, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.933868408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.926849365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 313.1209158712424, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95281982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90093994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88641357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 311.7087389841391, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.918609619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98846435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.898773193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 313.4746131849806, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9808349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27475.25, 81920.0], "load": 0.99, "temperature": 64, "power": 312.286}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92547607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 310.396277357953, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.915435791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.900665283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.936798095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 311.2577853262366, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.848236083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9835205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.911590576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 312.9801050626288, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99957275390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94586181640625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27475.25, 81920.0], "load": 0.89, "temperature": 66, "power": 306.435}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9364013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 311.40240661754444, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.957916259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958465576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.973114013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 313.3136123193163, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [3, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.763885498046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 293.2446243907499, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.794891357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.780670166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.772857666015625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27475.25, 81920.0], "load": 0.99, "temperature": 65, "power": 289.577}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 224.29612087890374, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27475.25, 81920.0], "load": 0.89, "temperature": 66, "power": 300.265}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712627904.4642608, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/convnext_large-fp32.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/convnext_large-fp32.D0.data new file mode 100644 index 000000000..304628b58 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/convnext_large-fp32.D0.data @@ -0,0 +1,171 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "fp32", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-fp32", "tag": ["convnext_large-fp32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 50, "power": 79.009, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 52, "power": 87.898, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712627748.53598, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712627751.3602233}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 49, "power": 78.664}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 49, "power": 78.345}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 49, "power": 77.838}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [902.0625, 81920.0], "load": 0, "temperature": 48, "power": 77.453}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.214483737945557}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [46053.25, 81920.0], "load": 1.0, "temperature": 55, "power": 308.029}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.33617639541626}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.254825592041016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.179396152496338}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 56, "power": 300.352}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.268494129180908}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 57, "power": 298.817}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2018351554870605}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 53.43045226349913, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.164165019989014}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 57, "power": 286.162}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 32.41472961790538, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.133245468139648}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.74010590789886, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.162621021270752}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 57, "power": 312.71}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.67653851820362, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.066978931427002}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.63753924389508, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.074810981750488}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 59, "power": 289.574}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.59607748356545, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0772786140441895}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.56783168181842, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043660640716553}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 59, "power": 268.083}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.650846330406814, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.142033576965332}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.699336294989216, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11406946182251}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 0.24, "temperature": 60, "power": 281.861}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.65911518156791, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.118796348571777}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.55917088308685, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.084875583648682}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 61, "power": 285.578}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.53863887675524, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.017924785614014}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.54099896751932, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0746049880981445}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 62, "power": 285.778}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.470961114913564, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962323188781738}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.42568102042911, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.078864097595215}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 63, "power": 268.578}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.96945515501371, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.047895908355713}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 63, "power": 290.53}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.93813159073947, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.076254367828369}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.34021614828534, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.083705902099609}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 64, "power": 336.507}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.31313289548287, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.153024196624756}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.323601638906744, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962716102600098}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 64, "power": 297.148}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.288184106340495, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.094931125640869}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.269972764812586, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.002690315246582}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 65, "power": 299.284}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.900058085133, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.089021682739258}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 65, "power": 290.261}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.89670291038278, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.033406734466553}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.24927032390966, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93844747543335}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 64, "power": 281.769}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.232231667319326, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.112069606781006}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.25775345838197, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 65, "power": 271.119}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.875514507293701}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 54.245876407105506, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.840810298919678}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 63.01611825485021, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 0.97, "temperature": 65, "power": 292.201}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938738822937012}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 32.00621557505846, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.930878162384033}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 65, "power": 290.918}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.11071143897027, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 65, "power": 291.509}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712627841.63542, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/convnext_large-fp32.D1.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/convnext_large-fp32.D1.data new file mode 100644 index 000000000..5a41a2cfe --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/convnext_large-fp32.D1.data @@ -0,0 +1,171 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "fp32", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-fp32", "tag": ["convnext_large-fp32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 50, "power": 78.85, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 52, "power": 83.639, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712627751.343198, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp32.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712627751.3683386}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 51, "power": 83.168}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 51, "power": 82.554}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 51, "power": 82.501}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 51, "power": 82.222}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.214483737945557}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [46053.25, 81920.0], "load": 1.0, "temperature": 57, "power": 283.234}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.33617639541626}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.254825592041016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.179396152496338}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 59, "power": 273.475}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.268494129180908}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 60, "power": 304.176}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2018351554870605}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 53.251459059712275, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.164165019989014}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 58, "power": 302.915}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 30.717673126431016, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.133245468139648}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.86361090326053, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.162621021270752}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 61, "power": 267.04}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.60393811931762, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.066978931427002}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.50077169033001, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.074810981750488}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 62, "power": 327.103}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.39494382054885, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0772786140441895}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.3333091992681, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043660640716553}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 62, "power": 302.741}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.37373407683214, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.142033576965332}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.6483284826448, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11406946182251}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 63, "power": 269.635}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.57561674830516, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.118796348571777}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.19138346036181, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.084875583648682}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 63, "power": 288.798}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.176918245749725, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.017924785614014}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.229810830516534, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0746049880981445}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 65, "power": 288.034}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.16202939307697, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962323188781738}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 65, "power": 284.53}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.165092539227885, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.078864097595215}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 40.20182278626143, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.047895908355713}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 66, "power": 97.06}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 40.134942770667955, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.076254367828369}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.060248287849525, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.083705902099609}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 66, "power": 275.121}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.05753752327251, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.153024196624756}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.06379547834955, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962716102600098}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 66, "power": 276.992}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.06124994772288, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.094931125640869}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 67, "power": 289.392}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.043972436824376, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.002690315246582}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 40.354225023959735, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.089021682739258}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 63, "power": 282.872}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 40.35291654509483, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.033406734466553}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.02998484199433, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93844747543335}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 67, "power": 339.205}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.992462149643146, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.112069606781006}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.9655157128374, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 67, "power": 308.284}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.875514507293701}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.1526140909153, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.840810298919678}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 67.42863596018421, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 0.96, "temperature": 68, "power": 284.519}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938738822937012}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 31.71342186941428, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.930878162384033}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 67, "power": 285.197}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.752496575283274, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 67, "power": 337.145}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp32.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "fp32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712627841.7338178, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/convnext_large-tf32-fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/convnext_large-tf32-fp16.D0.data new file mode 100644 index 000000000..2c8d3205a --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/convnext_large-tf32-fp16.D0.data @@ -0,0 +1,301 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 3.0, "name": "convnext_large-tf32-fp16", "tag": ["convnext_large-tf32-fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 58, "power": 89.245, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.59, "memory": 0.010771942138671876}, "temperature": 60, "power": 94.36, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712627974.923563, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712627977.8950877}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 54, "power": 81.872}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 53, "power": 81.384}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 52, "power": 80.187}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 52, "power": 80.688}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.214508056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.33624267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.254852294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.179443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.268218994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27475.25, 81920.0], "load": 0.9, "temperature": 56, "power": 275.301}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.201690673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.164154052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13348388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.16265869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0748291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077056884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.142242431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1138916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27475.25, 81920.0], "load": 0.9, "temperature": 57, "power": 272.592}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11871337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0850830078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01800537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 336.6938740130506, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.074462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9622802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.078765869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 318.97833260263485, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.048004150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0762939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.083831787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 320.9071880403855, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.153045654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962799072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27475.25, 81920.0], "load": 0.91, "temperature": 58, "power": 303.634}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.094970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 318.3877396735016, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.002777099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.088836669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.033294677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 320.90788289831676, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938385009765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.112030029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 348.3210296851482, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84075927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27475.25, 81920.0], "load": 0, "temperature": 54, "power": 82.373}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938690185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93060302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 223.3846692439479, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.798553466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88812255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.874664306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.3998318570049, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944305419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958221435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.954071044921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.5840651577264, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94793701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.856536865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27475.25, 81920.0], "load": 0.99, "temperature": 59, "power": 276.11}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89544677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 313.49566227440823, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9688720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.985076904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 316.15278002739325, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.878204345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.996490478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.023529052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 315.6225612609089, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.968353271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947784423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.955780029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27475.25, 81920.0], "load": 0.88, "temperature": 60, "power": 257.56}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 313.27456498834636, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0440673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 318.15443121639146, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.907257080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.994720458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921539306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 315.6676942277613, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.981842041015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.964111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.957977294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 316.11013648255175, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04656982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27475.25, 81920.0], "load": 0.99, "temperature": 60, "power": 214.543}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [2, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.791717529296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 345.95393561183727, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962738037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77764892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 195.15185690288845, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83221435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86846923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85845947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 322.7789782554671, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27475.25, 81920.0], "load": 0.87, "temperature": 61, "power": 304.924}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921295166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.807586669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.933868408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 335.624974541338, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.926849365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 319.99495847942916, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95281982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90093994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88641357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.39889873139754, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.918609619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27475.25, 81920.0], "load": 0.9, "temperature": 61, "power": 301.621}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98846435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.898773193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 317.8120612665015, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9808349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92547607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.68199465617363, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.915435791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.900665283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.936798095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 318.33029842836277, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.848236083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27475.25, 81920.0], "load": 0.9, "temperature": 62, "power": 351.967}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9835205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.911590576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 316.2413344387495, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99957275390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94586181640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9364013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 315.60357565142186, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.957916259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958465576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.973114013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.95602570435847, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [3, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27475.25, 81920.0], "load": 0.89, "temperature": 62, "power": 322.407}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.763885498046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 262.33329554230926, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.794891357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.780670166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.772857666015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.99804793307356, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.878204345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.843994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.841278076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27475.25, 81920.0], "load": 0.9, "temperature": 62, "power": 284.745}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 313.87264030579666, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27475.25, 81920.0], "load": 0.98, "temperature": 62, "power": 315.891}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32-fp16.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712628034.8195508, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/convnext_large-tf32-fp16.D1.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/convnext_large-tf32-fp16.D1.data new file mode 100644 index 000000000..ca022127c --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/convnext_large-tf32-fp16.D1.data @@ -0,0 +1,292 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 3.0, "name": "convnext_large-tf32-fp16", "tag": ["convnext_large-tf32-fp16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 57, "power": 84.23, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 58, "power": 90.043, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712627977.87774, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712627977.9032245}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 56, "power": 87.364}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 55, "power": 86.388}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 54, "power": 85.595}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 53, "power": 85.275}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.214508056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.33624267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.254852294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.179443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.268218994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27475.25, 81920.0], "load": 0.9, "temperature": 58, "power": 342.526}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.201690673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.164154052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13348388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.16265869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0748291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077056884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.142242431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1138916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27475.25, 81920.0], "load": 0.9, "temperature": 59, "power": 314.124}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11871337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0850830078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01800537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 340.4758127671689, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.074462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9622802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.078765869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 323.0851288567108, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.048004150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0762939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.083831787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 348.9751613266505, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.153045654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962799072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27475.25, 81920.0], "load": 0.89, "temperature": 60, "power": 302.627}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 310.61875864612927, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.094970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.002777099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.088836669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 317.5530418459217, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.033294677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938385009765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.112030029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 319.67027715819285, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 288.11430358766233, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84075927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27475.25, 81920.0], "load": 0, "temperature": 56, "power": 87.768}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938690185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93060302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 223.66603241134575, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.798553466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88812255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.874664306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.5492909342646, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944305419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958221435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.954071044921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 316.1954116463869, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94793701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.856536865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27475.25, 81920.0], "load": 0.99, "temperature": 61, "power": 146.88}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89544677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.8965392290568, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9688720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.985076904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.16091003069346, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.878204345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.996490478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.023529052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 317.90548711593135, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.968353271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947784423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27475.25, 81920.0], "load": 0, "temperature": 58, "power": 89.666}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 210.68296862567004, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.955780029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0440673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 316.3629218058495, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.907257080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.994720458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 316.99469664487935, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921539306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.981842041015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.964111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 315.0784631158193, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.957977294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27475.25, 81920.0], "load": 0.89, "temperature": 62, "power": 302.23}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04656982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [2, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.791717529296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 344.57387079719234, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962738037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 124.20682094236108, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77764892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83221435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86846923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 295.2452237553526, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85845947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27475.25, 81920.0], "load": 0.89, "temperature": 63, "power": 253.624}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921295166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.807586669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 312.5291960970114, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.933868408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.926849365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.60650020533603, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95281982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90093994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27475.25, 81920.0], "load": 0.89, "temperature": 63, "power": 305.991}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88641357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 298.25750926076745, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.918609619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98846435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.898773193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 328.8631992072026, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9808349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 339.35235221307465, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92547607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.915435791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.900665283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.40996608653745, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.936798095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27475.25, 81920.0], "load": 0.99, "temperature": 63, "power": 285.335}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.848236083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9835205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 313.40548910893517, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.911590576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99957275390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94586181640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 312.59887465772744, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9364013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.957916259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958465576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 313.517291661673, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.973114013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27475.25, 81920.0], "load": 0.88, "temperature": 63, "power": 248.611}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [3, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.763885498046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 335.4507416563655, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.794891357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.780670166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 187.73187164046527, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27475.25, 81920.0], "load": 0.94, "temperature": 62, "power": 288.554}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32-fp16.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712628033.7665124, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/convnext_large-tf32.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/convnext_large-tf32.D0.data new file mode 100644 index 000000000..e26248acd --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/convnext_large-tf32.D0.data @@ -0,0 +1,197 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-tf32", "tag": ["convnext_large-tf32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 1.0, "memory": 0.010771942138671876}, "temperature": 58, "power": 88.291, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 60, "power": 96.228, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712627907.545486, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712627910.6509728}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 55, "power": 82.372}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 54, "power": 81.774}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 53, "power": 81.676}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 53, "power": 79.992}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2144575119018555}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.336220741271973}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 0.35, "temperature": 53, "power": 80.533}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2547926902771}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.179419040679932}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.268465518951416}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2018303871154785}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1641645431518555}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 58, "power": 299.275}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.133294105529785}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.162642955780029}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 124.99732720558936, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.066996097564697}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 155.14866380116297, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.074804782867432}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 98.5638938050647, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077259063720703}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 0.97, "temperature": 58, "power": 291.32}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0435991287231445}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 152.3096615773383, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.142058372497559}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 98.008802479891, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.114046573638916}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1187896728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 59, "power": 294.484}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.76981544475044, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.084855556488037}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 98.19828690771624, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.017906188964844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.074584484100342}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 152.16747291958254, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962344169616699}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 98.538248820675, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.078824996948242}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 0.99, "temperature": 60, "power": 318.642}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.047876834869385}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 152.17114550830217, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0762128829956055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.99055342433375, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.083765983581543}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.153017044067383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.40314096958267, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 0.9, "temperature": 60, "power": 361.159}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962660312652588}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.86014324203543, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.094902038574219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.002725601196289}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.28077277617825, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.089033126831055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 98.1563134767725, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.033377170562744}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 60, "power": 290.245}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938430309295654}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 152.16680305735846, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.112060070037842}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 98.23512440879502, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.875522613525391}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 0, "temperature": 57, "power": 84.253}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 68.53601499667813, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.840785503387451}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938703536987305}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.4238944210524, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9308247566223145}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 98.09954194549488, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.798532009124756}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.888091087341309}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 61, "power": 285.966}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.58581961128064, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.874434947967529}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 98.23995748174642, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944365501403809}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958274841308594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 152.06876958920856, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.953979015350342}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 0.97, "temperature": 62, "power": 282.219}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.8399493718621, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947924613952637}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85654878616333}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.34042113455243, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.895290374755859}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 98.21790939060847, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912010192871094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.968919277191162}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 62, "power": 310.573}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.44045971086115, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.984976768493652}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.75258572810507, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.878252029418945}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.996346950531006}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.81905720739164, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.023502349853516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.87920163391587, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.968320369720459}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 0.99, "temperature": 63, "power": 314.96}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947745323181152}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.87298177757424, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 63, "power": 85.573}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32.D0-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712627972.2524028, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/convnext_large-tf32.D1.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/convnext_large-tf32.D1.data new file mode 100644 index 000000000..016c38ec0 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/convnext_large-tf32.D1.data @@ -0,0 +1,197 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "convnext_large", "--batch-size": 128}, "tags": ["classification", "convnet", "precision-showcase", "vision"], "weight": 0.0, "name": "convnext_large-tf32", "tag": ["convnext_large-tf32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 57, "power": 84.635, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 59, "power": 90.434, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712627910.633941, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712627910.6591394}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 56, "power": 87.964}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 56, "power": 86.975}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 55, "power": 86.584}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 54, "power": 88.172}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2144575119018555}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.336220741271973}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 0.8, "temperature": 56, "power": 90.225}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2547926902771}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.179419040679932}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.268465518951416}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2018303871154785}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1641645431518555}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 0.97, "temperature": 60, "power": 298.561}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.133294105529785}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.162642955780029}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 124.93489842534105, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.066996097564697}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 154.70656463235315, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.074804782867432}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 98.39077369790904, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077259063720703}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 61, "power": 299.831}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0435991287231445}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 152.49621274017517, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.142058372497559}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 98.01666569709296, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.114046573638916}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1187896728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 0.9, "temperature": 62, "power": 317.23}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.38069470904628, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.084855556488037}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 98.10796640672858, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.017906188964844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.074584484100342}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.82934003081652, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962344169616699}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 98.40549743910533, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.078824996948242}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 62, "power": 279.563}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.047876834869385}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.10844139222917, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0762128829956055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.8273309486668, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.083765983581543}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.153017044067383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.17313464525418, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 63, "power": 293.767}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962660312652588}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.89225029683415, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.094902038574219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.002725601196289}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.91287373234314, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.089033126831055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.88376912354815, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.033377170562744}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 63, "power": 288.908}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938430309295654}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.52232766137513, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.112060070037842}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 98.08132424647053, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.875522613525391}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 0, "temperature": 58, "power": 89.955}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 69.2388253054061, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.840785503387451}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938703536987305}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.00758206291934, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9308247566223145}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.37622848604127, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.798532009124756}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.888091087341309}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 0.89, "temperature": 63, "power": 308.404}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.95051404157365, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.874434947967529}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.82035605094916, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944365501403809}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958274841308594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.5916326160772, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.953979015350342}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 64, "power": 284.906}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.55443470387803, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947924613952637}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85654878616333}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.24772523613183, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.895290374755859}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.18061962707763, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912010192871094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.968919277191162}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 0.99, "temperature": 64, "power": 276.382}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.7094329832393, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.984976768493652}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.22108380932517, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.878252029418945}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.996346950531006}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.32604739036393, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.023502349853516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.16184969059728, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.968320369720459}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 1.0, "temperature": 65, "power": 303.038}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947745323181152}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.5602811765769, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [49595.25, 81920.0], "load": 0.96, "temperature": 65, "power": 338.176}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32.D1-fb5679c624c0e6290d39628373b49ebc.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "convnext_large", "--batch-size", "128"], "time": 1712627972.118939, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/davit_large-multi.0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/davit_large-multi.0.data new file mode 100644 index 000000000..3777f04d3 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/davit_large-multi.0.data @@ -0,0 +1,331 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "njobs", "n": 1}, "argv": {"--amp": true, "--model": "davit_large", "--batch-size": 128, "--lr-base": 0.01}, "tags": ["classification", "multigpu", "transformer", "vision"], "weight": 5.0, "name": "davit_large-multi", "tag": ["davit_large-multi", "0"], "job-number": 0, "devices": ["0", "1"]}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.01, "memory": 0.010771942138671876}, "temperature": 62, "power": 92.092, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.01, "memory": 0.010771942138671876}, "temperature": 64, "power": 100.869, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712629029.731186, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["torchrun", "--nproc_per_node=2", "--", "-m", "voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/davit_large-multi.0", "--checkpoint-hist", "1"], "time": 1712629029.7494504}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training in distributed mode with multiple processes, 1 device per process.Process 1, total 2, device cuda:1.\n", "pipe": "stderr"} +{"event": "line", "data": "Training in distributed mode with multiple processes, 1 device per process.Process 0, total 2, device cuda:0.\n", "pipe": "stderr"} +{"event": "line", "data": "Model davit_large created, param count:196811752\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.01) calculated from base learning rate (0.01) and global batch size (256) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch DistributedDataParallel.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4313.25, 81920.0], "load": 0.09, "temperature": 57, "power": 86.223}, "1": {"memory": [4313.25, 81920.0], "load": 0.09, "temperature": 59, "power": 92.525}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.155410289764404}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py:251: UserWarning: Grad strides do not match bucket view strides. This may indicate grad was not created according to the gradient layout contract, or that the param's strides changed since DDP was constructed. This is not an error, but may impair performance.\n", "pipe": "stderr"} +{"event": "line", "data": "grad.sizes() = [1536, 1, 3, 3], strides() = [9, 1, 3, 1]\n", "pipe": "stderr"} +{"event": "line", "data": "bucket_view.sizes() = [1536, 1, 3, 3], strides() = [9, 9, 3, 1] (Triggered internally at ../torch/csrc/distributed/c10d/reducer.cpp:320.)\n", "pipe": "stderr"} +{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/autograd/__init__.py:251: UserWarning: Grad strides do not match bucket view strides. This may indicate grad was not created according to the gradient layout contract, or that the param's strides changed since DDP was constructed. This is not an error, but may impair performance.\n", "pipe": "stderr"} +{"event": "line", "data": "grad.sizes() = [1536, 1, 3, 3], strides() = [9, 1, 3, 1]\n", "pipe": "stderr"} +{"event": "line", "data": "bucket_view.sizes() = [1536, 1, 3, 3], strides() = [9, 9, 3, 1] (Triggered internally at ../torch/csrc/distributed/c10d/reducer.cpp:320.)\n", "pipe": "stderr"} +{"event": "line", "data": " Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass\n", "pipe": "stderr"} +{"event": "line", "data": "Train: 0 [ 0/16 ( 0%)] Loss: 7.172 (7.17) Time: 4.187s, 61.14/s (4.187s, 61.14/s) LR: 1.000e-05 Data: 0.769 (0.769)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [17111.25, 81920.0], "load": 0.97, "temperature": 60, "power": 210.707}, "1": {"memory": [17111.25, 81920.0], "load": 0.97, "temperature": 62, "power": 228.456}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.168247222900391}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.25187873840332}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33863.25, 81920.0], "load": 0.98, "temperature": 61, "power": 313.54}, "1": {"memory": [33863.25, 81920.0], "load": 1.0, "temperature": 64, "power": 272.284}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.134157180786133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.230111122131348}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.218700408935547}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33863.25, 81920.0], "load": 1.0, "temperature": 63, "power": 312.97}, "1": {"memory": [33863.25, 81920.0], "load": 1.0, "temperature": 65, "power": 270.4}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 580.5875591911001, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 15/16 (100%)] Loss: 7.249 (7.21) Time: 0.421s, 607.87/s (0.667s, 384.00/s) LR: 1.000e-05 Data: 0.000 (0.056)\n", "pipe": "stderr"} +{"event": "line", "data": "Distributing BatchNorm running means and vars\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.878 (0.878) Loss: 7.2354 (7.2354) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.327 (0.210) Loss: 7.0591 (7.2404) Acc@1: 0.0000 ( 0.0969) Acc@5: 0.0000 ( 0.5329)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/davit_large-multi.0/20240409-021717-davit_large-224/checkpoint-0.pth.tar', 0.09689922480620156)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 604.6975342144715, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34109.25, 81920.0], "load": 0.94, "temperature": 63, "power": 286.614}, "1": {"memory": [34109.25, 81920.0], "load": 0.93, "temperature": 65, "power": 320.669}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6795.25, 81920.0], "load": 0.03, "temperature": 58, "power": 84.797}, "1": {"memory": [33831.25, 81920.0], "load": 1.0, "temperature": 60, "power": 99.111}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.150633811950684}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 0/16 ( 0%)] Loss: 7.231 (7.23) Time: 0.973s, 263.00/s (0.973s, 263.00/s) LR: 2.008e-03 Data: 0.553 (0.553)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 500.44049710881455, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.119625091552734}, "pipe": "data"} +{"event": "data", "data": {"rate": 576.5714153534002, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33905.25, 81920.0], "load": 1.0, "temperature": 63, "power": 306.95}, "1": {"memory": [33905.25, 81920.0], "load": 1.0, "temperature": 65, "power": 300.835}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.041637420654297}, "pipe": "data"} +{"event": "data", "data": {"rate": 600.2782040923274, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 534.1704240148216, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.061583995819092}, "pipe": "data"} +{"event": "data", "data": {"rate": 564.7736734658465, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.003388404846191}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33905.25, 81920.0], "load": 0.99, "temperature": 63, "power": 304.932}, "1": {"memory": [33905.25, 81920.0], "load": 1.0, "temperature": 65, "power": 307.766}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 577.0608573553427, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.952075958251953}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 15/16 (100%)] Loss: 6.962 (7.10) Time: 0.421s, 607.49/s (0.462s, 554.65/s) LR: 2.008e-03 Data: 0.000 (0.041)\n", "pipe": "stderr"} +{"event": "line", "data": "Distributing BatchNorm running means and vars\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.628 (0.628) Loss: 6.8906 (6.8906) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.031 (0.178) Loss: 6.6548 (6.8795) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 1.1386)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/davit_large-multi.0/20240409-021717-davit_large-224/checkpoint-1.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 608.7403066389157, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34149.25, 81920.0], "load": 0.96, "temperature": 64, "power": 308.888}, "1": {"memory": [34149.25, 81920.0], "load": 0.93, "temperature": 65, "power": 301.977}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34149.25, 81920.0], "load": 0.03, "temperature": 59, "power": 85.581}, "1": {"memory": [34319.25, 81920.0], "load": 1.0, "temperature": 61, "power": 99.672}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.909141540527344}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 0/16 ( 0%)] Loss: 6.912 (6.91) Time: 0.949s, 269.73/s (0.949s, 269.73/s) LR: 4.006e-03 Data: 0.527 (0.527)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.93126106262207}, "pipe": "data"} +{"event": "data", "data": {"rate": 512.3139019000888, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.873774528503418}, "pipe": "data"} +{"event": "data", "data": {"rate": 600.6726886512652, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34393.25, 81920.0], "load": 0.96, "temperature": 63, "power": 311.034}, "1": {"memory": [34393.25, 81920.0], "load": 0.96, "temperature": 65, "power": 318.287}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 535.4627762038576, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.900935173034668}, "pipe": "data"} +{"event": "data", "data": {"rate": 568.4400330593177, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.050783157348633}, "pipe": "data"} +{"event": "data", "data": {"rate": 577.30698563485, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.984735488891602}, "pipe": "data"} +{"event": "data", "data": {"rate": 568.1526917816167, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34393.25, 81920.0], "load": 0.95, "temperature": 64, "power": 306.211}, "1": {"memory": [34393.25, 81920.0], "load": 0.95, "temperature": 66, "power": 311.432}}}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 15/16 (100%)] Loss: 6.990 (6.95) Time: 0.422s, 607.32/s (0.501s, 511.13/s) LR: 4.006e-03 Data: 0.000 (0.081)\n", "pipe": "stderr"} +{"event": "line", "data": "Distributing BatchNorm running means and vars\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.618 (0.618) Loss: 6.8478 (6.8478) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.025 (0.203) Loss: 6.3434 (6.8139) Acc@1: 0.0000 ( 0.2422) Acc@5: 0.0000 ( 1.2112)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 607.9394808785537, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34637.25, 81920.0], "load": 0.97, "temperature": 64, "power": 306.774}, "1": {"memory": [34637.25, 81920.0], "load": 0.96, "temperature": 66, "power": 319.879}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34657.25, 81920.0], "load": 0, "temperature": 58, "power": 85.483}, "1": {"memory": [34807.25, 81920.0], "load": 1.0, "temperature": 61, "power": 99.574}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.778879165649414}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 0/16 ( 0%)] Loss: 6.788 (6.79) Time: 1.009s, 253.63/s (1.009s, 253.63/s) LR: 6.004e-03 Data: 0.588 (0.588)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.850714683532715}, "pipe": "data"} +{"event": "data", "data": {"rate": 601.6528088920498, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 533.1166924800658, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.922903060913086}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34881.25, 81920.0], "load": 0.98, "temperature": 63, "power": 277.171}, "1": {"memory": [34881.25, 81920.0], "load": 1.0, "temperature": 65, "power": 252.656}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 567.0580361682862, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.018014907836914}, "pipe": "data"} +{"event": "data", "data": {"rate": 577.4048690630932, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.937933921813965}, "pipe": "data"} +{"event": "data", "data": {"rate": 568.6636096406875, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34881.25, 81920.0], "load": 0.96, "temperature": 64, "power": 267.873}, "1": {"memory": [34881.25, 81920.0], "load": 1.0, "temperature": 65, "power": 264.47}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.017698287963867}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 15/16 (100%)] Loss: 7.018 (6.90) Time: 0.422s, 606.96/s (0.464s, 552.18/s) LR: 6.004e-03 Data: 0.000 (0.043)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 582.2980403992284, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "line", "data": "Distributing BatchNorm running means and vars\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.752 (0.752) Loss: 6.7895 (6.7895) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.025 (0.185) Loss: 6.3929 (6.8059) Acc@1: 0.0000 ( 0.2180) Acc@5: 0.0000 ( 1.2597)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 606.7864973924488, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35125.25, 81920.0], "load": 0.96, "temperature": 64, "power": 313.839}, "1": {"memory": [35125.25, 81920.0], "load": 0.97, "temperature": 67, "power": 323.154}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35125.25, 81920.0], "load": 0, "temperature": 59, "power": 86.277}, "1": {"memory": [35295.25, 81920.0], "load": 1.0, "temperature": 62, "power": 100.381}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.904083251953125}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 0/16 ( 0%)] Loss: 6.853 (6.85) Time: 0.953s, 268.66/s (0.953s, 268.66/s) LR: 8.002e-03 Data: 0.530 (0.530)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 529.3657743513127, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8155198097229}, "pipe": "data"} +{"event": "data", "data": {"rate": 568.2051201069911, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.930899620056152}, "pipe": "data"} +{"event": "data", "data": {"rate": 570.9823716463858, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35369.25, 81920.0], "load": 0.95, "temperature": 64, "power": 259.526}, "1": {"memory": [35369.25, 81920.0], "load": 1.0, "temperature": 66, "power": 273.911}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.988905906677246}, "pipe": "data"} +{"event": "data", "data": {"rate": 568.5632701771107, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.049200534820557}, "pipe": "data"} +{"event": "data", "data": {"rate": 578.0477615078985, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.023489952087402}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35369.25, 81920.0], "load": 1.0, "temperature": 64, "power": 254.918}, "1": {"memory": [35369.25, 81920.0], "load": 1.0, "temperature": 66, "power": 261.669}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 602.52321187331, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 15/16 (100%)] Loss: 7.023 (6.94) Time: 0.424s, 603.44/s (0.461s, 555.48/s) LR: 8.002e-03 Data: 0.000 (0.040)\n", "pipe": "stderr"} +{"event": "line", "data": "Distributing BatchNorm running means and vars\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.616 (0.616) Loss: 6.8027 (6.8027) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.028 (0.180) Loss: 6.4915 (6.8067) Acc@1: 0.0000 ( 0.3634) Acc@5: 0.0000 ( 1.2112)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/davit_large-multi.0/20240409-021717-davit_large-224/checkpoint-4.pth.tar', 0.3633720930232558)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 602.207585268895, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35613.25, 81920.0], "load": 0.96, "temperature": 65, "power": 246.788}, "1": {"memory": [35613.25, 81920.0], "load": 0.97, "temperature": 67, "power": 271.428}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35613.25, 81920.0], "load": 0, "temperature": 59, "power": 85.972}, "1": {"memory": [35783.25, 81920.0], "load": 1.0, "temperature": 62, "power": 100.381}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.792165756225586}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 0/16 ( 0%)] Loss: 6.813 (6.81) Time: 0.941s, 271.93/s (0.941s, 271.93/s) LR: 9.993e-03 Data: 0.520 (0.520)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 605.3688732345543, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.850456237792969}, "pipe": "data"} +{"event": "data", "data": {"rate": 547.619685311781, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.948673248291016}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35857.25, 81920.0], "load": 1.0, "temperature": 63, "power": 247.934}, "1": {"memory": [35857.25, 81920.0], "load": 0.96, "temperature": 66, "power": 310.844}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 563.4857356150329, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.892074108123779}, "pipe": "data"} +{"event": "data", "data": {"rate": 600.2464100612378, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 533.3175199133289, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.052328586578369}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35857.25, 81920.0], "load": 0.96, "temperature": 65, "power": 332.208}, "1": {"memory": [35857.25, 81920.0], "load": 0.96, "temperature": 67, "power": 308.207}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 566.738506675914, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.019845008850098}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 15/16 (100%)] Loss: 7.022 (6.92) Time: 0.422s, 606.26/s (0.480s, 533.56/s) LR: 9.993e-03 Data: 0.000 (0.059)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 579.6797439016556, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "line", "data": "Distributing BatchNorm running means and vars\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.623 (0.623) Loss: 6.8399 (6.8399) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.026 (0.204) Loss: 6.4452 (6.8113) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 1.2112)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 605.4440493036456, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36101.25, 81920.0], "load": 0.92, "temperature": 64, "power": 321.016}, "1": {"memory": [36101.25, 81920.0], "load": 0.92, "temperature": 66, "power": 325.75}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36101.25, 81920.0], "load": 0.03, "temperature": 60, "power": 87.24}, "1": {"memory": [36271.25, 81920.0], "load": 1.0, "temperature": 63, "power": 101.638}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.836991310119629}, "pipe": "data"} +{"event": "line", "data": "Train: 6 [ 0/16 ( 0%)] Loss: 6.863 (6.86) Time: 1.040s, 246.26/s (1.040s, 246.26/s) LR: 9.990e-03 Data: 0.617 (0.617)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 533.855660492057, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84394645690918}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36345.25, 81920.0], "load": 0.93, "temperature": 64, "power": 306.799}, "1": {"memory": [36345.25, 81920.0], "load": 0.97, "temperature": 66, "power": 316.679}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 567.9447148770887, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.880205154418945}, "pipe": "data"} +{"event": "data", "data": {"rate": 573.067407342561, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.029755592346191}, "pipe": "data"} +{"event": "data", "data": {"rate": 568.7672304209201, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.075010299682617}, "pipe": "data"} +{"event": "data", "data": {"rate": 580.1615200492898, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36345.25, 81920.0], "load": 1.0, "temperature": 65, "power": 306.286}, "1": {"memory": [36345.25, 81920.0], "load": 0.96, "temperature": 67, "power": 315.863}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.027011394500732}, "pipe": "data"} +{"event": "data", "data": {"rate": 605.4076052575675, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 6 [ 15/16 (100%)] Loss: 7.038 (6.95) Time: 0.421s, 607.40/s (0.465s, 550.17/s) LR: 9.990e-03 Data: 0.000 (0.045)\n", "pipe": "stderr"} +{"event": "line", "data": "Distributing BatchNorm running means and vars\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.751 (0.751) Loss: 6.8414 (6.8414) Acc@1: 0.0000 ( 0.0000) Acc@5: 3.5156 ( 3.5156)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.025 (0.185) Loss: 6.4465 (6.8038) Acc@1: 0.0000 ( 0.2665) Acc@5: 3.1250 ( 1.1143)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 606.1318093285486, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36589.25, 81920.0], "load": 0.96, "temperature": 65, "power": 309.379}, "1": {"memory": [36589.25, 81920.0], "load": 0.97, "temperature": 67, "power": 299.034}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36589.25, 81920.0], "load": 0, "temperature": 60, "power": 86.569}, "1": {"memory": [36759.25, 81920.0], "load": 1.0, "temperature": 62, "power": 101.163}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.814316749572754}, "pipe": "data"} +{"event": "line", "data": "Train: 7 [ 0/16 ( 0%)] Loss: 6.842 (6.84) Time: 0.964s, 265.49/s (0.964s, 265.49/s) LR: 9.987e-03 Data: 0.542 (0.542)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 532.8628870075561, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.831911563873291}, "pipe": "data"} +{"event": "data", "data": {"rate": 528.1481769884617, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36833.25, 81920.0], "load": 1.0, "temperature": 64, "power": 304.055}, "1": {"memory": [36833.25, 81920.0], "load": 1.0, "temperature": 67, "power": 311.364}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.872281074523926}, "pipe": "data"} +{"event": "data", "data": {"rate": 568.1696366328794, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.004095554351807}, "pipe": "data"} +{"event": "data", "data": {"rate": 577.1176269514725, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944265365600586}, "pipe": "data"} +{"event": "data", "data": {"rate": 597.6134014674551, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36833.25, 81920.0], "load": 0.98, "temperature": 65, "power": 304.897}, "1": {"memory": [36833.25, 81920.0], "load": 1.0, "temperature": 67, "power": 311.226}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 534.4672886399601, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.032905101776123}, "pipe": "data"} +{"event": "line", "data": "Train: 7 [ 15/16 (100%)] Loss: 7.024 (6.93) Time: 0.423s, 605.76/s (0.462s, 554.46/s) LR: 9.987e-03 Data: 0.000 (0.041)\n", "pipe": "stderr"} +{"event": "line", "data": "Distributing BatchNorm running means and vars\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.622 (0.622) Loss: 6.7570 (6.7570) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.025 (0.177) Loss: 6.3381 (6.8011) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 1.1628)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 606.3697735374108, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37077.25, 81920.0], "load": 0.96, "temperature": 65, "power": 319.208}, "1": {"memory": [37077.25, 81920.0], "load": 0.92, "temperature": 68, "power": 298.204}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37077.25, 81920.0], "load": 0.03, "temperature": 60, "power": 86.863}, "1": {"memory": [37247.25, 81920.0], "load": 1.0, "temperature": 63, "power": 101.554}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.788442134857178}, "pipe": "data"} +{"event": "line", "data": "Train: 8 [ 0/16 ( 0%)] Loss: 6.829 (6.83) Time: 0.961s, 266.52/s (0.961s, 266.52/s) LR: 9.982e-03 Data: 0.538 (0.538)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 448.5085470223982, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.840719223022461}, "pipe": "data"} +{"event": "data", "data": {"rate": 567.3393271718736, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.81549596786499}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37321.25, 81920.0], "load": 0.96, "temperature": 65, "power": 306.584}, "1": {"memory": [37321.25, 81920.0], "load": 0.96, "temperature": 67, "power": 311.529}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 569.033304483493, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.996615409851074}, "pipe": "data"} +{"event": "data", "data": {"rate": 567.7724891988271, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.973349094390869}, "pipe": "data"} +{"event": "data", "data": {"rate": 574.2030165867626, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37321.25, 81920.0], "load": 0.96, "temperature": 65, "power": 306.449}, "1": {"memory": [37321.25, 81920.0], "load": 0.96, "temperature": 68, "power": 309.251}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.076796531677246}, "pipe": "data"} +{"event": "data", "data": {"rate": 600.754632769218, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 8 [ 15/16 (100%)] Loss: 7.013 (6.92) Time: 0.422s, 606.75/s (0.473s, 541.42/s) LR: 9.982e-03 Data: 0.000 (0.052)\n", "pipe": "stderr"} +{"event": "line", "data": "Distributing BatchNorm running means and vars\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.619 (0.619) Loss: 6.8363 (6.8363) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.025 (0.177) Loss: 6.4406 (6.7999) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 1.1870)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 606.1186285743506, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37565.25, 81920.0], "load": 0.96, "temperature": 65, "power": 321.786}, "1": {"memory": [37565.25, 81920.0], "load": 0.97, "temperature": 68, "power": 334.731}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37565.25, 81920.0], "load": 0.03, "temperature": 60, "power": 87.449}, "1": {"memory": [37735.25, 81920.0], "load": 1.0, "temperature": 63, "power": 101.456}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8690385818481445}, "pipe": "data"} +{"event": "line", "data": "Train: 9 [ 0/16 ( 0%)] Loss: 6.856 (6.86) Time: 0.949s, 269.75/s (0.949s, 269.75/s) LR: 9.978e-03 Data: 0.527 (0.527)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 605.6076705926903, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.855340957641602}, "pipe": "data"} +{"event": "data", "data": {"rate": 584.6671098405295, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "stop", "data": null, "pipe": "data"} +{"event": "line", "data": "[2024-04-09 02:19:21,496] torch.distributed.elastic.agent.server.api: [WARNING] Received Signals.SIGTERM death signal, shutting down workers\n", "pipe": "stderr"} +{"event": "line", "data": "[2024-04-09 02:19:21,496] torch.distributed.elastic.multiprocessing.api: [WARNING] Sending process 69711 closing signal SIGTERM\n", "pipe": "stderr"} +{"event": "line", "data": "[2024-04-09 02:19:21,496] torch.distributed.elastic.multiprocessing.api: [WARNING] Sending process 69712 closing signal SIGTERM\n", "pipe": "stderr"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/torchrun\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py\", line 346, in wrapper\n", "pipe": "stderr"} +{"event": "line", "data": " return f(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/run.py\", line 806, in main\n", "pipe": "stderr"} +{"event": "line", "data": " run(args)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/run.py\", line 797, in run\n", "pipe": "stderr"} +{"event": "line", "data": " elastic_launch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/launcher/api.py\", line 134, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " return launch_agent(self._config, self._entrypoint, list(args))\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/launcher/api.py\", line 255, in launch_agent\n", "pipe": "stderr"} +{"event": "line", "data": " result = agent.run()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/elastic/metrics/api.py\", line 124, in wrapper\n", "pipe": "stderr"} +{"event": "line", "data": " result = f(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/elastic/agent/server/api.py\", line 736, in run\n", "pipe": "stderr"} +{"event": "line", "data": " result = self._invoke_run(role)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/elastic/agent/server/api.py\", line 877, in _invoke_run\n", "pipe": "stderr"} +{"event": "line", "data": " time.sleep(monitor_interval)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py\", line 62, in _terminate_process_handler\n", "pipe": "stderr"} +{"event": "line", "data": " raise SignalException(f\"Process {os.getpid()} got signal: {sigval}\", sigval=sigval)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.distributed.elastic.multiprocessing.api.SignalException: Process 69700 got signal: 15\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["torchrun", "--nproc_per_node=2", "--", "-m", "voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/davit_large-multi.0", "--checkpoint-hist", "1"], "time": 1712629162.4688249, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/davit_large.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/davit_large.D0.data new file mode 100644 index 000000000..0028b4802 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/davit_large.D0.data @@ -0,0 +1,251 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "davit_large", "--batch-size": 128, "--lr-base": 0.01}, "tags": ["classification", "transformer", "vision"], "weight": 1.0, "name": "davit_large", "tag": ["davit_large", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.99, "memory": 0.010771942138671876}, "temperature": 55, "power": 85.616, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.44, "memory": 0.010771942138671876}, "temperature": 58, "power": 92.181, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712628911.545401, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/davit_large.D0", "--checkpoint-hist", "1"], "time": 1712628914.6163864}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model davit_large created, param count:196811752\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.005) calculated from base learning rate (0.01) and global batch size (128) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2151.25, 81920.0], "load": 0, "temperature": 51, "power": 79.38}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2242937088012695}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 0/32 ( 0%)] Loss: 7.224 (7.22) Time: 4.222s, 30.32/s (4.222s, 30.32/s) LR: 1.000e-05 Data: 0.788 (0.788)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [13765.25, 81920.0], "load": 0, "temperature": 52, "power": 80.175}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.176398277282715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.255929470062256}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.25603723526001}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.283304214477539}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32475.25, 81920.0], "load": 0.98, "temperature": 57, "power": 301.439}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.325239181518555}, "pipe": "data"} +{"event": "data", "data": {"rate": 274.48046209471994, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.229712009429932}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.7974288625414, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.304784774780273}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32475.25, 81920.0], "load": 1.0, "temperature": 58, "power": 184.112}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 277.4042804258502, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.174015045166016}, "pipe": "data"} +{"event": "data", "data": {"rate": 309.4861389508042, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.312165260314941}, "pipe": "data"} +{"event": "data", "data": {"rate": 277.62041345416065, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.123180389404297}, "pipe": "data"} +{"event": "data", "data": {"rate": 310.2790185933143, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32475.25, 81920.0], "load": 0.99, "temperature": 60, "power": 331.866}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 277.7921957835296, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.223388671875}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 31/32 (100%)] Loss: 7.311 (7.24) Time: 0.414s, 309.09/s (0.546s, 234.25/s) LR: 1.000e-05 Data: 0.000 (0.032)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.767 (0.767) Loss: 7.1174 (7.1174) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.362 (0.183) Loss: 7.0508 (7.2335) Acc@1: 0.0000 ( 0.0969) Acc@5: 0.0000 ( 0.5329)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/davit_large.D0/20240409-021521-davit_large-224/checkpoint-0.pth.tar', 0.09689922480620156)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 309.89423310631093, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32719.25, 81920.0], "load": 0.86, "temperature": 60, "power": 308.572}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32719.25, 81920.0], "load": 0.95, "temperature": 61, "power": 332.918}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4639.25, 81920.0], "load": 0, "temperature": 56, "power": 83.798}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.413697719573975}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 0/32 ( 0%)] Loss: 7.414 (7.41) Time: 0.996s, 128.50/s (0.996s, 128.50/s) LR: 1.008e-03 Data: 0.582 (0.582)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 266.2340922867909, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.087247848510742}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.94593008016886, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.022705078125}, "pipe": "data"} +{"event": "data", "data": {"rate": 277.4432097456956, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.015710353851318}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32549.25, 81920.0], "load": 0.98, "temperature": 62, "power": 299.177}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.6271893266286, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.373815463532, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.004858016967773}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.9053100123746, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.067661285400391}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32549.25, 81920.0], "load": 0.98, "temperature": 61, "power": 339.304}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.88238622769444, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.943252086639404}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.66644552608534, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.032105445861816}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.8665874871487, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32549.25, 81920.0], "load": 1.0, "temperature": 63, "power": 323.992}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.978611946105957}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.96779182231916, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 277.86734386585374, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.981616973876953}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.4330053532524, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01680326461792}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 31/32 (100%)] Loss: 6.997 (7.05) Time: 0.415s, 308.19/s (0.439s, 291.37/s) LR: 1.008e-03 Data: 0.000 (0.025)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 276.51331294137657, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.619 (0.619) Loss: 6.8692 (6.8692) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.7812 ( 0.7812)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.168) Loss: 6.7062 (6.8682) Acc@1: 0.0000 ( 0.2907) Acc@5: 3.1250 ( 1.2839)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/davit_large.D0/20240409-021521-davit_large-224/checkpoint-1.pth.tar', 0.29069767441860467)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32549.25, 81920.0], "load": 0.98, "temperature": 63, "power": 318.622}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.2000250041717, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32793.25, 81920.0], "load": 0.93, "temperature": 65, "power": 324.998}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32793.25, 81920.0], "load": 0.03, "temperature": 60, "power": 90.221}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.885842323303223}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 0/32 ( 0%)] Loss: 6.886 (6.89) Time: 1.324s, 96.66/s (1.324s, 96.66/s) LR: 2.006e-03 Data: 0.910 (0.910)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 231.83814939747066, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.914949417114258}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.7849126503462, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33037.25, 81920.0], "load": 0.94, "temperature": 64, "power": 312.804}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931748867034912}, "pipe": "data"} +{"event": "data", "data": {"rate": 273.47606544151915, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.940550327301025}, "pipe": "data"} +{"event": "data", "data": {"rate": 304.396931874248, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 273.61801060627846, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33037.25, 81920.0], "load": 0.99, "temperature": 64, "power": 315.48}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.992883205413818}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.13405420334783, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91033935546875}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.43373685631013, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.011209487915039}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33037.25, 81920.0], "load": 0.98, "temperature": 64, "power": 270.423}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.2241611003461, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89418888092041}, "pipe": "data"} +{"event": "data", "data": {"rate": 274.5073394528733, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.969819068908691}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.8290910059832, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 274.56461653289443, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.095127105712891}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33037.25, 81920.0], "load": 1.0, "temperature": 65, "power": 304.241}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.6442421458965, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.027336120605469}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 31/32 (100%)] Loss: 6.948 (6.96) Time: 0.416s, 307.92/s (0.451s, 284.10/s) LR: 2.006e-03 Data: 0.000 (0.035)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 277.3183302730064, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.620 (0.620) Loss: 6.7616 (6.7616) Acc@1: 0.7812 ( 0.7812) Acc@5: 2.3438 ( 2.3438)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.177) Loss: 6.4838 (6.8220) Acc@1: 3.1250 ( 0.2665) Acc@5: 6.2500 ( 1.0174)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 307.83844659486596, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33281.25, 81920.0], "load": 0.96, "temperature": 65, "power": 316.915}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33281.25, 81920.0], "load": 0.93, "temperature": 66, "power": 309.38}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33301.25, 81920.0], "load": 0, "temperature": 60, "power": 86.472}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.863471984863281}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 0/32 ( 0%)] Loss: 6.863 (6.86) Time: 1.389s, 92.13/s (1.389s, 92.13/s) LR: 3.004e-03 Data: 0.974 (0.974)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 222.47639532397997, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.857139587402344}, "pipe": "data"} +{"event": "data", "data": {"rate": 301.1853363034171, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.905974388122559}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.8297306046767, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33525.25, 81920.0], "load": 0.99, "temperature": 64, "power": 289.658}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89678955078125}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.19583077298, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 274.3755718935443, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.914050102233887}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33525.25, 81920.0], "load": 1.0, "temperature": 64, "power": 330.536}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.4330978729962, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9702558517456055}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.7857657604657, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.908979415893555}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.3462517186264, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.006004810333252}, "pipe": "data"} +{"event": "data", "data": {"rate": 273.57352794140127, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33525.25, 81920.0], "load": 0.99, "temperature": 65, "power": 266.036}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.034702301025391}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.5805582302188, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 274.224388800086, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.979143142700195}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.1012373644309, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01246976852417}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33525.25, 81920.0], "load": 0.96, "temperature": 66, "power": 272.454}}}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 31/32 (100%)] Loss: 6.999 (6.96) Time: 0.416s, 307.58/s (0.452s, 282.92/s) LR: 3.004e-03 Data: 0.000 (0.037)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 277.59290439745695, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.618 (0.618) Loss: 6.8222 (6.8222) Acc@1: 0.0000 ( 0.0000) Acc@5: 1.5625 ( 1.5625)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.168) Loss: 6.3795 (6.8159) Acc@1: 0.0000 ( 0.1696) Acc@5: 6.2500 ( 1.0174)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 307.54321650738024, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33769.25, 81920.0], "load": 0.95, "temperature": 66, "power": 288.382}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33769.25, 81920.0], "load": 0.97, "temperature": 66, "power": 288.961}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33789.25, 81920.0], "load": 0, "temperature": 60, "power": 86.904}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.859582901000977}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 0/32 ( 0%)] Loss: 6.860 (6.86) Time: 1.288s, 99.42/s (1.288s, 99.42/s) LR: 4.002e-03 Data: 0.873 (0.873)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 278.7457732885539, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.827597618103027}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.0314119535267, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.876450061798096}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.09899800122423, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.914443016052246}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34013.25, 81920.0], "load": 0.99, "temperature": 64, "power": 278.24}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.7383011899085, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 274.99410843921095, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.918074607849121}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.88014455025785, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9963908195495605}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34013.25, 81920.0], "load": 0.96, "temperature": 65, "power": 341.882}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.71122402730265, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.041259288787842}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.4416146532077, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.073038101196289}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.12309958584626, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34013.25, 81920.0], "load": 1.0, "temperature": 66, "power": 298.87}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.011545658111572}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.59226692193585, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.17473466663176, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "stop", "data": null, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/davit_large.D0", "--checkpoint-hist", "1"], "time": 1712629026.488132, "return_code": -15}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/davit_large.D1.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/davit_large.D1.data new file mode 100644 index 000000000..7599d9dd9 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/davit_large.D1.data @@ -0,0 +1,252 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "davit_large", "--batch-size": 128, "--lr-base": 0.01}, "tags": ["classification", "transformer", "vision"], "weight": 1.0, "name": "davit_large", "tag": ["davit_large", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 54, "power": 81.749, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 56, "power": 87.658, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712628914.597912, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/davit_large.D1", "--checkpoint-hist", "1"], "time": 1712628914.6243284}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model davit_large created, param count:196811752\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.005) calculated from base learning rate (0.01) and global batch size (128) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2151.25, 81920.0], "load": 0, "temperature": 53, "power": 85.371}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2242937088012695}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 0/32 ( 0%)] Loss: 7.224 (7.22) Time: 4.231s, 30.25/s (4.231s, 30.25/s) LR: 1.000e-05 Data: 0.816 (0.816)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [13249.25, 81920.0], "load": 0.7, "temperature": 55, "power": 105.576}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.176398277282715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.255929470062256}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.163320541381836}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [32475.25, 81920.0], "load": 0.96, "temperature": 60, "power": 302.911}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.234607696533203}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.243466377258301}, "pipe": "data"} +{"event": "data", "data": {"rate": 290.9950713550947, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0485429763793945}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.9448714576353, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [32475.25, 81920.0], "load": 0.96, "temperature": 62, "power": 303.886}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.246738433837891}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.38623132284584, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.151193141937256}, "pipe": "data"} +{"event": "data", "data": {"rate": 277.23111988281926, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.270837306976318}, "pipe": "data"} +{"event": "data", "data": {"rate": 309.32865304474086, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 278.50423655694675, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.312950611114502}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [32475.25, 81920.0], "load": 0.95, "temperature": 62, "power": 307.777}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 309.44540717065206, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.311519622802734}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 31/32 (100%)] Loss: 7.312 (7.24) Time: 0.413s, 309.71/s (0.541s, 236.64/s) LR: 1.000e-05 Data: 0.001 (0.033)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.770 (0.770) Loss: 7.1174 (7.1174) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.533 (0.187) Loss: 7.0505 (7.2336) Acc@1: 0.0000 ( 0.0969) Acc@5: 0.0000 ( 0.5329)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/davit_large.D1/20240409-021521-davit_large-224/checkpoint-0.pth.tar', 0.09689922480620156)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 309.72179849215235, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [32719.25, 81920.0], "load": 0.97, "temperature": 63, "power": 301.579}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [32719.25, 81920.0], "load": 0.95, "temperature": 64, "power": 322.359}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [4639.25, 81920.0], "load": 0, "temperature": 58, "power": 90.569}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.413557052612305}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 0/32 ( 0%)] Loss: 7.414 (7.41) Time: 1.039s, 123.20/s (1.039s, 123.20/s) LR: 1.008e-03 Data: 0.624 (0.624)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 298.5564299793937, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.087121963500977}, "pipe": "data"} +{"event": "data", "data": {"rate": 304.93684969472173, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.022743225097656}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.40095939766326, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.015771865844727}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [32549.25, 81920.0], "load": 0.98, "temperature": 64, "power": 341.201}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.2552334752563, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.9483741984131, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00472354888916}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.01393680959234, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.067629814147949}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [32549.25, 81920.0], "load": 0.96, "temperature": 65, "power": 312.075}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 273.65192187246606, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.943271636962891}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.4002385096057, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.032142162322998}, "pipe": "data"} +{"event": "data", "data": {"rate": 274.6939412684524, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [32549.25, 81920.0], "load": 0.98, "temperature": 66, "power": 297.438}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.978492259979248}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.6740370588822, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.63187595476836, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.981564998626709}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.37120486170505, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.016761779785156}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 31/32 (100%)] Loss: 6.997 (7.05) Time: 0.418s, 306.40/s (0.442s, 289.88/s) LR: 1.008e-03 Data: 0.000 (0.027)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 276.9595767998813, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.617 (0.617) Loss: 6.8693 (6.8693) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.7812 ( 0.7812)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.168) Loss: 6.7069 (6.8682) Acc@1: 0.0000 ( 0.2907) Acc@5: 3.1250 ( 1.3081)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/davit_large.D1/20240409-021521-davit_large-224/checkpoint-1.pth.tar', 0.29069767441860467)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [32549.25, 81920.0], "load": 0.99, "temperature": 66, "power": 336.96}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.28764017864034, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [32793.25, 81920.0], "load": 0.96, "temperature": 67, "power": 318.958}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [32793.25, 81920.0], "load": 0.03, "temperature": 62, "power": 98.584}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.885795593261719}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 0/32 ( 0%)] Loss: 6.886 (6.89) Time: 1.368s, 93.59/s (1.368s, 93.59/s) LR: 2.006e-03 Data: 0.953 (0.953)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 236.01381835830017, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.914775848388672}, "pipe": "data"} +{"event": "data", "data": {"rate": 303.76525433759264, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33037.25, 81920.0], "load": 0.94, "temperature": 66, "power": 260.369}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931666851043701}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.40903090332137, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.940074920654297}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.06785561948413, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33037.25, 81920.0], "load": 0.98, "temperature": 67, "power": 313.917}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 270.9185759627733, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.993440628051758}, "pipe": "data"} +{"event": "data", "data": {"rate": 303.63230583212186, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912421226501465}, "pipe": "data"} +{"event": "data", "data": {"rate": 271.7884968247221, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.013060569763184}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33037.25, 81920.0], "load": 0.99, "temperature": 67, "power": 283.752}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.2272299723372, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.895408630371094}, "pipe": "data"} +{"event": "data", "data": {"rate": 271.9655609330277, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.973190784454346}, "pipe": "data"} +{"event": "data", "data": {"rate": 304.13975430806533, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 274.7811890289321, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.093724727630615}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33037.25, 81920.0], "load": 0.96, "temperature": 67, "power": 312.672}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.762443683644, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.032838821411133}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 31/32 (100%)] Loss: 6.946 (6.95) Time: 0.417s, 306.80/s (0.453s, 282.63/s) LR: 2.006e-03 Data: 0.000 (0.037)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 276.26401315887796, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.616 (0.616) Loss: 6.7520 (6.7520) Acc@1: 0.0000 ( 0.0000) Acc@5: 2.3438 ( 2.3438)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.168) Loss: 6.4895 (6.8228) Acc@1: 0.0000 ( 0.2180) Acc@5: 6.2500 ( 1.0417)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 306.6652680174901, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33281.25, 81920.0], "load": 0.94, "temperature": 67, "power": 318.855}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33281.25, 81920.0], "load": 0.97, "temperature": 64, "power": 255.251}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.859885215759277}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 0/32 ( 0%)] Loss: 6.860 (6.86) Time: 0.959s, 133.51/s (0.959s, 133.51/s) LR: 3.004e-03 Data: 0.543 (0.543)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 307.18778867102475, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33525.25, 81920.0], "load": 0.98, "temperature": 64, "power": 293.725}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.858007431030273}, "pipe": "data"} +{"event": "data", "data": {"rate": 244.75348953415627, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86963415145874}, "pipe": "data"} +{"event": "data", "data": {"rate": 304.3332694669362, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 274.1127273236523, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.888379096984863}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33525.25, 81920.0], "load": 0.98, "temperature": 66, "power": 227.165}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 304.99117679587476, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.926602363586426}, "pipe": "data"} +{"event": "data", "data": {"rate": 273.6890826421333, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.993829727172852}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33525.25, 81920.0], "load": 1.0, "temperature": 66, "power": 334.429}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 304.34277476188754, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.019230842590332}, "pipe": "data"} +{"event": "data", "data": {"rate": 273.0580121111767, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.102219581604004}, "pipe": "data"} +{"event": "data", "data": {"rate": 303.7431817100973, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 273.07219102632, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.034765720367432}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33525.25, 81920.0], "load": 0.99, "temperature": 67, "power": 297.974}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 303.70764729829193, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.040440082550049}, "pipe": "data"} +{"event": "data", "data": {"rate": 273.282381349194, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0316362380981445}, "pipe": "data"} +{"event": "data", "data": {"rate": 304.24468396987527, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 31/32 (100%)] Loss: 7.000 (6.96) Time: 0.417s, 307.32/s (0.449s, 285.28/s) LR: 3.004e-03 Data: 0.000 (0.033)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33525.25, 81920.0], "load": 0.99, "temperature": 68, "power": 263.797}}}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.618 (0.618) Loss: 6.8269 (6.8269) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.168) Loss: 6.3618 (6.8160) Acc@1: 0.0000 ( 0.1211) Acc@5: 6.2500 ( 0.9690)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 307.7189246395801, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33769.25, 81920.0], "load": 0.97, "temperature": 68, "power": 295.906}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33769.25, 81920.0], "load": 0.3, "temperature": 65, "power": 101.358}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.870382308959961}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 0/32 ( 0%)] Loss: 6.870 (6.87) Time: 0.922s, 138.89/s (0.922s, 138.89/s) LR: 4.002e-03 Data: 0.507 (0.507)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [34013.25, 81920.0], "load": 0.99, "temperature": 63, "power": 267.464}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947832107543945}, "pipe": "data"} +{"event": "data", "data": {"rate": 266.0705843021448, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86939811706543}, "pipe": "data"} +{"event": "data", "data": {"rate": 303.5191698506625, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 272.6696323423011, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.970827102661133}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [34013.25, 81920.0], "load": 0.98, "temperature": 67, "power": 262.369}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 303.725015520902, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938931465148926}, "pipe": "data"} +{"event": "data", "data": {"rate": 271.09688077930434, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.949921131134033}, "pipe": "data"} +{"event": "data", "data": {"rate": 303.2114582378272, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.059994220733643}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [34013.25, 81920.0], "load": 0.95, "temperature": 68, "power": 337.46}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 273.00742769495594, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.979753494262695}, "pipe": "data"} +{"event": "data", "data": {"rate": 303.80132451617925, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 271.55109323611424, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.975605010986328}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [34013.25, 81920.0], "load": 0.96, "temperature": 68, "power": 305.5}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 303.5661451878831, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.086536884307861}, "pipe": "data"} +{"event": "data", "data": {"rate": 274.18611402547504, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "stop", "data": null, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "davit_large", "--batch-size", "128", "--lr-base", "0.01", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/davit_large.D1", "--checkpoint-hist", "1"], "time": 1712629026.399126, "return_code": -15}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/dlrm.0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/dlrm.0.data new file mode 100644 index 000000000..0c0b7658e --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/dlrm.0.data @@ -0,0 +1,280 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/dlrm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "dlrm", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "tags": ["nlp", "rl"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/dlrm", "plan": {"method": "njobs", "n": 1}, "argv": {"--num-batches": 1000, "--data-generation": "random", "--arch-mlp-bot": "512-512-64", "--arch-mlp-top": "1024-1024-1024-1", "--arch-sparse-feature-size": 64, "--arch-embedding-size": "1000000-1000000-1000000-1000000-1000000-1000000-1000000-1000000", "--num-indices-per-lookup": 100, "--arch-interaction-op": "dot", "--numpy-rand-seed": "727", "--print-freq": 999999, "--mini-batch-size": 16384, "--test-mini-batch-size": 16384, "--test-num-workers": 0, "--use-gpu": true}, "weight": 1.0, "name": "dlrm", "tag": ["dlrm", "0"], "job-number": 0, "devices": ["0", "1"]}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 62, "power": 92.712, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 63, "power": 95.335, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712629476.227517, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/dlrm/voirconf-dlrm.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/dlrm/dlrm/dlrm_s_pytorch.py", "--num-batches", "1000", "--data-generation", "random", "--arch-mlp-bot", "512-512-64", "--arch-mlp-top", "1024-1024-1024-1", "--arch-sparse-feature-size", "64", "--arch-embedding-size", "1000000-1000000-1000000-1000000-1000000-1000000-1000000-1000000", "--num-indices-per-lookup", "100", "--arch-interaction-op", "dot", "--numpy-rand-seed", "727", "--print-freq", "999999", "--mini-batch-size", "16384", "--test-mini-batch-size", "16384", "--test-num-workers", "0", "--use-gpu"], "time": 1712629476.245485}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "line", "data": "Unable to import mlperf_logging, No module named 'mlperf_logging'\n", "pipe": "stdout"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:347: UserWarning: torch.distributed.reduce_op is deprecated, please use torch.distributed.ReduceOp instead\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "world size: 1, current rank: 0, local rank: 0\n", "pipe": "stdout"} +{"event": "line", "data": "Using 2 GPU(s)...\n", "pipe": "stdout"} +{"event": "line", "data": "time/loss/accuracy (if enabled):\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1327.25, 81920.0], "load": 0, "temperature": 58, "power": 85.318}, "1": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 59, "power": 91.338}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1327.25, 81920.0], "load": 0, "temperature": 57, "power": 84.733}, "1": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 58, "power": 90.064}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1327.25, 81920.0], "load": 0, "temperature": 57, "power": 84.048}, "1": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 58, "power": 89.444}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0887361615896225}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3155.25, 81920.0], "load": 0.28, "temperature": 56, "power": 86.171}, "1": {"memory": [3137.25, 81920.0], "load": 0.05, "temperature": 57, "power": 91.224}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4251.25, 81920.0], "load": 0, "temperature": 56, "power": 83.372}, "1": {"memory": [4233.25, 81920.0], "load": 0, "temperature": 57, "power": 88.607}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08788755536079407}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4259.25, 81920.0], "load": 0, "temperature": 55, "power": 82.871}, "1": {"memory": [4241.25, 81920.0], "load": 0, "temperature": 56, "power": 87.952}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08937834203243256}, "pipe": "data"} +{"event": "data", "data": {"rate": 519872.2962134868, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4259.25, 81920.0], "load": 0, "temperature": 55, "power": 82.577}, "1": {"memory": [4645.25, 81920.0], "load": 0, "temperature": 56, "power": 87.897}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08813147246837616}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4461.25, 81920.0], "load": 0.02, "temperature": 54, "power": 81.587}, "1": {"memory": [4645.25, 81920.0], "load": 0.01, "temperature": 55, "power": 87.408}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 37612.08347501473, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4461.25, 81920.0], "load": 0, "temperature": 54, "power": 80.696}, "1": {"memory": [4645.25, 81920.0], "load": 0, "temperature": 55, "power": 86.906}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08771650493144989}, "pipe": "data"} +{"event": "data", "data": {"rate": 526337.7867194093, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4461.25, 81920.0], "load": 0, "temperature": 53, "power": 80.585}, "1": {"memory": [4645.25, 81920.0], "load": 0, "temperature": 55, "power": 86.319}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08742949366569519}, "pipe": "data"} +{"event": "data", "data": {"rate": 31693.85629026732, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4663.25, 81920.0], "load": 0, "temperature": 53, "power": 80.291}, "1": {"memory": [4645.25, 81920.0], "load": 0, "temperature": 54, "power": 85.93}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08757737278938293}, "pipe": "data"} +{"event": "data", "data": {"rate": 508223.51809293096, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4663.25, 81920.0], "load": 0, "temperature": 52, "power": 79.999}, "1": {"memory": [4645.25, 81920.0], "load": 0, "temperature": 54, "power": 90.142}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08654382824897766}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4663.25, 81920.0], "load": 0, "temperature": 52, "power": 79.608}, "1": {"memory": [4847.25, 81920.0], "load": 0.09, "temperature": 54, "power": 85.414}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 521840.2103872192, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0858154371380806}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4663.25, 81920.0], "load": 0, "temperature": 52, "power": 79.315}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 53, "power": 84.24}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 512149.45161184855, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4663.25, 81920.0], "load": 0, "temperature": 51, "power": 79.315}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 53, "power": 88.412}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08628800511360168}, "pipe": "data"} +{"event": "data", "data": {"rate": 513076.20941651415, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4663.25, 81920.0], "load": 0, "temperature": 51, "power": 79.009}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 52, "power": 83.737}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0873076319694519}, "pipe": "data"} +{"event": "data", "data": {"rate": 523913.900327299, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4663.25, 81920.0], "load": 0, "temperature": 51, "power": 78.911}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 52, "power": 87.778}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08590050786733627}, "pipe": "data"} +{"event": "data", "data": {"rate": 514061.9990648741, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4663.25, 81920.0], "load": 0, "temperature": 50, "power": 79.027}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 52, "power": 83.236}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08602668344974518}, "pipe": "data"} +{"event": "data", "data": {"rate": 524264.6136041163, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4663.25, 81920.0], "load": 0, "temperature": 50, "power": 78.508}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 52, "power": 83.04}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08581672608852386}, "pipe": "data"} +{"event": "data", "data": {"rate": 515904.4779431785, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4663.25, 81920.0], "load": 0.06, "temperature": 50, "power": 81.196}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 51, "power": 82.858}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4663.25, 81920.0], "load": 0, "temperature": 49, "power": 78.116}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 51, "power": 86.907}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08588778972625732}, "pipe": "data"} +{"event": "data", "data": {"rate": 500367.4267890441, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4663.25, 81920.0], "load": 0, "temperature": 49, "power": 78.116}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 51, "power": 82.441}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08632193505764008}, "pipe": "data"} +{"event": "data", "data": {"rate": 533073.5293983674, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4663.25, 81920.0], "load": 0, "temperature": 49, "power": 78.116}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 51, "power": 82.168}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08664406836032867}, "pipe": "data"} +{"event": "data", "data": {"rate": 532724.3073673587, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4663.25, 81920.0], "load": 0, "temperature": 49, "power": 77.725}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 50, "power": 82.05}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0852133184671402}, "pipe": "data"} +{"event": "data", "data": {"rate": 536680.8100054284, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4663.25, 81920.0], "load": 0, "temperature": 48, "power": 77.628}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 50, "power": 81.854}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08576951175928116}, "pipe": "data"} +{"event": "data", "data": {"rate": 516792.4587161397, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4865.25, 81920.0], "load": 0, "temperature": 48, "power": 77.517}, "1": {"memory": [4847.25, 81920.0], "load": 0.05, "temperature": 50, "power": 81.952}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08471724390983582}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4865.25, 81920.0], "load": 0, "temperature": 48, "power": 77.321}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 49, "power": 81.462}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 539504.3001298248, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4865.25, 81920.0], "load": 0, "temperature": 47, "power": 77.224}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 49, "power": 85.623}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08532554656267166}, "pipe": "data"} +{"event": "data", "data": {"rate": 545750.4166493316, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4865.25, 81920.0], "load": 0, "temperature": 47, "power": 77.419}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 49, "power": 81.365}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08427020162343979}, "pipe": "data"} +{"event": "data", "data": {"rate": 532268.8303256008, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4865.25, 81920.0], "load": 0, "temperature": 47, "power": 76.931}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 49, "power": 85.219}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08591149747371674}, "pipe": "data"} +{"event": "data", "data": {"rate": 523296.1319523903, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4865.25, 81920.0], "load": 0, "temperature": 47, "power": 76.655}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 48, "power": 85.024}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0845126137137413}, "pipe": "data"} +{"event": "data", "data": {"rate": 535816.7708098377, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4865.25, 81920.0], "load": 0, "temperature": 46, "power": 76.35}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 48, "power": 80.557}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0840827077627182}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4865.25, 81920.0], "load": 0, "temperature": 46, "power": 96.916}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 48, "power": 84.926}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 521478.7065991278, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4865.25, 81920.0], "load": 0, "temperature": 46, "power": 78.135}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 48, "power": 80.165}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08428709208965302}, "pipe": "data"} +{"event": "data", "data": {"rate": 543355.0683098867, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4865.25, 81920.0], "load": 0, "temperature": 46, "power": 76.044}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 48, "power": 80.186}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08434845507144928}, "pipe": "data"} +{"event": "data", "data": {"rate": 523956.42360853497, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4865.25, 81920.0], "load": 0, "temperature": 46, "power": 76.044}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 47, "power": 84.339}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08465416729450226}, "pipe": "data"} +{"event": "data", "data": {"rate": 269136.28999960906, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5069.25, 81920.0], "load": 0, "temperature": 46, "power": 75.946}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 47, "power": 79.879}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08376754820346832}, "pipe": "data"} +{"event": "data", "data": {"rate": 539625.166030912, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5069.25, 81920.0], "load": 0, "temperature": 45, "power": 75.768}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 47, "power": 79.977}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.083040751516819}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5069.25, 81920.0], "load": 0.05, "temperature": 45, "power": 76.172}, "1": {"memory": [4847.25, 81920.0], "load": 0.09, "temperature": 47, "power": 84.045}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 534384.0625171949, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5069.25, 81920.0], "load": 0, "temperature": 45, "power": 75.768}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 47, "power": 79.664}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08479100465774536}, "pipe": "data"} +{"event": "data", "data": {"rate": 542934.9577904854, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5069.25, 81920.0], "load": 0, "temperature": 45, "power": 75.768}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 47, "power": 79.567}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08379324525594711}, "pipe": "data"} +{"event": "data", "data": {"rate": 518059.0076924996, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5069.25, 81920.0], "load": 0, "temperature": 45, "power": 75.572}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 46, "power": 79.37}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08349813520908356}, "pipe": "data"} +{"event": "data", "data": {"rate": 532137.5482022299, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5069.25, 81920.0], "load": 0, "temperature": 44, "power": 75.181}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 46, "power": 83.53}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08437865972518921}, "pipe": "data"} +{"event": "data", "data": {"rate": 526075.2248398439, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5069.25, 81920.0], "load": 0, "temperature": 44, "power": 74.875}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 46, "power": 79.273}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08468881249427795}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5069.25, 81920.0], "load": 0, "temperature": 44, "power": 74.968}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 46, "power": 79.076}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 539210.0559516001, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08403709530830383}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 44, "power": 75.342}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 45, "power": 78.784}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 82740.48025172498, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 43, "power": 74.467}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 45, "power": 82.846}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08420669287443161}, "pipe": "data"} +{"event": "data", "data": {"rate": 531853.3198924373, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 43, "power": 74.564}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 45, "power": 82.65}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08320620656013489}, "pipe": "data"} +{"event": "data", "data": {"rate": 533904.7782913483, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 43, "power": 74.387}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 45, "power": 78.399}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08308559656143188}, "pipe": "data"} +{"event": "data", "data": {"rate": 519450.280580511, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 43, "power": 74.386}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 44, "power": 78.27}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08385886251926422}, "pipe": "data"} +{"event": "data", "data": {"rate": 536725.5541476774, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 42, "power": 74.484}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 44, "power": 78.289}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08402976393699646}, "pipe": "data"} +{"event": "data", "data": {"rate": 526507.959737309, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0.1, "temperature": 42, "power": 74.353}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 44, "power": 77.962}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 42, "power": 74.289}, "1": {"memory": [4847.25, 81920.0], "load": 0, "temperature": 44, "power": 77.8}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08415201306343079}, "pipe": "data"} +{"event": "data", "data": {"rate": 500074.10766195087, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 42, "power": 73.592}, "1": {"memory": [5051.25, 81920.0], "load": 0, "temperature": 44, "power": 77.506}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08402085304260254}, "pipe": "data"} +{"event": "data", "data": {"rate": 532370.0243312987, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 42, "power": 76.561}, "1": {"memory": [5051.25, 81920.0], "load": 0, "temperature": 43, "power": 81.757}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08228246122598648}, "pipe": "data"} +{"event": "data", "data": {"rate": 518937.89148984617, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 41, "power": 73.495}, "1": {"memory": [5051.25, 81920.0], "load": 0, "temperature": 43, "power": 76.907}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08508101850748062}, "pipe": "data"} +{"event": "data", "data": {"rate": 538300.8584656763, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 41, "power": 73.494}, "1": {"memory": [5051.25, 81920.0], "load": 0, "temperature": 43, "power": 80.754}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08269783854484558}, "pipe": "data"} +{"event": "data", "data": {"rate": 521985.234992598, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 41, "power": 73.201}, "1": {"memory": [5051.25, 81920.0], "load": 0.09, "temperature": 43, "power": 76.515}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08433524519205093}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 41, "power": 73.299}, "1": {"memory": [5051.25, 81920.0], "load": 0, "temperature": 43, "power": 76.532}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 527565.1163280437, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 41, "power": 73.495}, "1": {"memory": [5051.25, 81920.0], "load": 0, "temperature": 42, "power": 76.319}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08258543908596039}, "pipe": "data"} +{"event": "data", "data": {"rate": 530381.8069199158, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 41, "power": 73.201}, "1": {"memory": [5051.25, 81920.0], "load": 0, "temperature": 42, "power": 80.299}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08313082903623581}, "pipe": "data"} +{"event": "data", "data": {"rate": 535141.5263814582, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 41, "power": 73.103}, "1": {"memory": [5051.25, 81920.0], "load": 0, "temperature": 42, "power": 76.123}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08317264169454575}, "pipe": "data"} +{"event": "data", "data": {"rate": 538352.1528220887, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 40, "power": 73.086}, "1": {"memory": [5051.25, 81920.0], "load": 0, "temperature": 42, "power": 80.264}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08357995748519897}, "pipe": "data"} +{"event": "data", "data": {"rate": 551095.044083399, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 40, "power": 72.993}, "1": {"memory": [5051.25, 81920.0], "load": 0, "temperature": 42, "power": 80.166}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08405735343694687}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 40, "power": 99.18}, "1": {"memory": [5051.25, 81920.0], "load": 0, "temperature": 42, "power": 80.069}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 522131.28867604496, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 40, "power": 73.593}, "1": {"memory": [5051.25, 81920.0], "load": 0, "temperature": 42, "power": 75.822}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08272015303373337}, "pipe": "data"} +{"event": "data", "data": {"rate": 530182.5783894552, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 40, "power": 73.005}, "1": {"memory": [5051.25, 81920.0], "load": 0, "temperature": 42, "power": 75.835}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08338482677936554}, "pipe": "data"} +{"event": "data", "data": {"rate": 528944.4546014149, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 40, "power": 73.005}, "1": {"memory": [5051.25, 81920.0], "load": 0, "temperature": 41, "power": 75.708}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08422384411096573}, "pipe": "data"} +{"event": "data", "data": {"rate": 544103.0310098876, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 40, "power": 72.895}, "1": {"memory": [5051.25, 81920.0], "load": 0, "temperature": 41, "power": 79.796}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08405454456806183}, "pipe": "data"} +{"event": "data", "data": {"rate": 535052.2059620505, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 40, "power": 72.89}, "1": {"memory": [5051.25, 81920.0], "load": 0, "temperature": 41, "power": 75.708}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08417406678199768}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0.09, "temperature": 39, "power": 72.89}, "1": {"memory": [5051.25, 81920.0], "load": 0.09, "temperature": 41, "power": 102.448}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 517127.90494643693, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 39, "power": 73.005}, "1": {"memory": [5051.25, 81920.0], "load": 0, "temperature": 41, "power": 75.512}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08274278044700623}, "pipe": "data"} +{"event": "data", "data": {"rate": 534601.899089715, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 39, "power": 76.755}, "1": {"memory": [5051.25, 81920.0], "load": 0, "temperature": 41, "power": 75.512}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08461865782737732}, "pipe": "data"} +{"event": "data", "data": {"rate": 538075.8625224053, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 39, "power": 72.683}, "1": {"memory": [5051.25, 81920.0], "load": 0, "temperature": 40, "power": 77.213}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08379694819450378}, "pipe": "data"} +{"event": "data", "data": {"rate": 522708.78385886503, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 39, "power": 72.406}, "1": {"memory": [5051.25, 81920.0], "load": 0, "temperature": 40, "power": 75.121}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08401922881603241}, "pipe": "data"} +{"event": "data", "data": {"rate": 543789.3294288303, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 38, "power": 72.193}, "1": {"memory": [5051.25, 81920.0], "load": 0, "temperature": 40, "power": 79.098}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08402208983898163}, "pipe": "data"} +{"event": "data", "data": {"rate": 541495.1207766463, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 38, "power": 72.406}, "1": {"memory": [5051.25, 81920.0], "load": 0, "temperature": 40, "power": 74.827}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08343112468719482}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 38, "power": 72.112}, "1": {"memory": [5051.25, 81920.0], "load": 0, "temperature": 40, "power": 74.745}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 553784.3252739304, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 38, "power": 72.014}, "1": {"memory": [5051.25, 81920.0], "load": 0, "temperature": 39, "power": 74.631}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5273.25, 81920.0], "load": 0, "temperature": 38, "power": 72.002}, "1": {"memory": [5051.25, 81920.0], "load": 0, "temperature": 39, "power": 74.631}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/dlrm/voirconf-dlrm.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/dlrm/dlrm/dlrm_s_pytorch.py", "--num-batches", "1000", "--data-generation", "random", "--arch-mlp-bot", "512-512-64", "--arch-mlp-top", "1024-1024-1024-1", "--arch-sparse-feature-size", "64", "--arch-embedding-size", "1000000-1000000-1000000-1000000-1000000-1000000-1000000-1000000", "--num-indices-per-lookup", "100", "--arch-interaction-op", "dot", "--numpy-rand-seed", "727", "--print-freq", "999999", "--mini-batch-size", "16384", "--test-mini-batch-size", "16384", "--test-num-workers", "0", "--use-gpu"], "time": 1712629723.7047665, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/focalnet.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/focalnet.D0.data new file mode 100644 index 000000000..011917d19 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/focalnet.D0.data @@ -0,0 +1,255 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "focalnet_base_lrf"}, "tags": ["classification", "convnet", "vision"], "weight": 2.0, "name": "focalnet", "tag": ["focalnet", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 1.0, "memory": 0.010771942138671876}, "temperature": 60, "power": 89.733, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.37, "memory": 0.010771942138671876}, "temperature": 62, "power": 96.631, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712629165.35442, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-focalnet.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "focalnet_base_lrf", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/focalnet.D0", "--checkpoint-hist", "1"], "time": 1712629168.4007306}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model focalnet_base_lrf created, param count:88749768\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.9\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.05) calculated from base learning rate (0.1) and global batch size (128) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 7.004456520080566}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4341.25, 81920.0], "load": 0.98, "temperature": 60, "power": 178.841}}}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 0/32 ( 0%)] Loss: 7.004 (7.00) Time: 14.339s, 8.93/s (14.339s, 8.93/s) LR: 1.000e-05 Data: 0.820 (0.820)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [22343.25, 81920.0], "load": 0.79, "temperature": 58, "power": 197.024}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [21753.25, 81920.0], "load": 0.99, "temperature": 58, "power": 167.073}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [11601.25, 81920.0], "load": 0.99, "temperature": 59, "power": 176.63}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [10985.25, 81920.0], "load": 1.0, "temperature": 56, "power": 88.158}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.006711483001709}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0363240242004395}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.024362564086914}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23831.25, 81920.0], "load": 0.99, "temperature": 60, "power": 277.579}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.970383167266846}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.941365718841553}, "pipe": "data"} +{"event": "data", "data": {"rate": 372.38641503752433, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23831.25, 81920.0], "load": 0.99, "temperature": 60, "power": 302.749}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.997442245483398}, "pipe": "data"} +{"event": "data", "data": {"rate": 396.0939046927532, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.032032489776611}, "pipe": "data"} +{"event": "data", "data": {"rate": 389.21678584273485, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 374.6513554488949, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.980218887329102}, "pipe": "data"} +{"event": "data", "data": {"rate": 397.58550052892843, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23831.25, 81920.0], "load": 0.95, "temperature": 62, "power": 84.577}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.979130744934082}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 31/32 (100%)] Loss: 7.005 (7.00) Time: 0.323s, 396.74/s (0.799s, 160.22/s) LR: 1.000e-05 Data: 0.000 (0.033)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 230.44281295627076, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.738 (0.738) Loss: 6.9615 (6.9615) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 1.012 (0.163) Loss: 6.8639 (6.9459) Acc@1: 0.0000 ( 0.1453) Acc@5: 3.1250 ( 0.6541)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/focalnet.D0/20240409-021933-focalnet_base_lrf-224/checkpoint-0.pth.tar', 0.14534883720930233)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 396.62149997832677, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24077.25, 81920.0], "load": 0.94, "temperature": 62, "power": 310.209}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5717.25, 81920.0], "load": 0.9, "temperature": 59, "power": 89.929}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0206828117370605}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 0/32 ( 0%)] Loss: 7.021 (7.02) Time: 1.373s, 93.24/s (1.373s, 93.24/s) LR: 1.001e-02 Data: 0.549 (0.549)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 216.88988113298382, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.986520767211914}, "pipe": "data"} +{"event": "data", "data": {"rate": 396.43144921781743, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23923.25, 81920.0], "load": 0.99, "temperature": 61, "power": 188.269}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.963555335998535}, "pipe": "data"} +{"event": "data", "data": {"rate": 290.2297889592503, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 363.3840543187619, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.061347007751465}, "pipe": "data"} +{"event": "data", "data": {"rate": 397.02272226331144, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.999067306518555}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23927.25, 81920.0], "load": 0.95, "temperature": 63, "power": 294.255}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 381.17152962425524, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.087905406951904}, "pipe": "data"} +{"event": "data", "data": {"rate": 381.7978693038124, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.007884502410889}, "pipe": "data"} +{"event": "data", "data": {"rate": 389.48671730303687, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 373.0484803011852, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.197999954223633}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23927.25, 81920.0], "load": 0.96, "temperature": 63, "power": 296.347}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 361.9375176989449, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.108449935913086}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 31/32 (100%)] Loss: 7.108 (7.05) Time: 0.325s, 394.41/s (0.371s, 344.68/s) LR: 1.001e-02 Data: 0.000 (0.024)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.591 (0.591) Loss: 6.8922 (6.8922) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.031 (0.129) Loss: 6.9395 (6.9699) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 0.9932)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/focalnet.D0/20240409-021933-focalnet_base_lrf-224/checkpoint-1.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 396.6361695491586, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24171.25, 81920.0], "load": 0.93, "temperature": 63, "power": 306.779}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24171.25, 81920.0], "load": 0.03, "temperature": 59, "power": 86.493}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99587345123291}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 0/32 ( 0%)] Loss: 6.996 (7.00) Time: 0.868s, 147.52/s (0.868s, 147.52/s) LR: 2.001e-02 Data: 0.547 (0.547)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 321.0429433160861, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.097563743591309}, "pipe": "data"} +{"event": "data", "data": {"rate": 383.1769638535609, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.009759902954102}, "pipe": "data"} +{"event": "data", "data": {"rate": 395.7924987757527, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24415.25, 81920.0], "load": 0.95, "temperature": 63, "power": 262.87}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.160521984100342}, "pipe": "data"} +{"event": "data", "data": {"rate": 376.23800454038144, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 387.3565264210218, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.141149997711182}, "pipe": "data"} +{"event": "data", "data": {"rate": 390.7884546334167, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.252389430999756}, "pipe": "data"} +{"event": "data", "data": {"rate": 373.3233407119136, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24415.25, 81920.0], "load": 0.96, "temperature": 65, "power": 309.98}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.190238952636719}, "pipe": "data"} +{"event": "data", "data": {"rate": 359.66069037261025, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.219792366027832}, "pipe": "data"} +{"event": "data", "data": {"rate": 393.0529113918928, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.122175216674805}, "pipe": "data"} +{"event": "data", "data": {"rate": 370.4690688616242, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 31/32 (100%)] Loss: 7.261 (7.13) Time: 0.322s, 397.69/s (0.346s, 369.65/s) LR: 2.001e-02 Data: 0.000 (0.024)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.590 (0.590) Loss: 6.9301 (6.9301) Acc@1: 0.0000 ( 0.0000) Acc@5: 6.2500 ( 6.2500)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.031 (0.129) Loss: 6.7456 (7.1150) Acc@1: 0.0000 ( 0.1696) Acc@5: 0.0000 ( 0.8479)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24415.25, 81920.0], "load": 0.98, "temperature": 65, "power": 321.994}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 397.13357486800857, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24659.25, 81920.0], "load": 0.92, "temperature": 65, "power": 314.722}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.089280128479004}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 0/32 ( 0%)] Loss: 7.089 (7.09) Time: 0.864s, 148.16/s (0.864s, 148.16/s) LR: 3.000e-02 Data: 0.542 (0.542)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 363.43267098154945, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2314772605896}, "pipe": "data"} +{"event": "data", "data": {"rate": 355.90963555194395, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24903.25, 81920.0], "load": 0.99, "temperature": 64, "power": 281.582}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.042531967163086}, "pipe": "data"} +{"event": "data", "data": {"rate": 396.05243430693156, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.210859298706055}, "pipe": "data"} +{"event": "data", "data": {"rate": 385.86702027868756, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.23007345199585}, "pipe": "data"} +{"event": "data", "data": {"rate": 371.2801557223229, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24903.25, 81920.0], "load": 0.95, "temperature": 65, "power": 299.935}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 393.8270865562863, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.154796600341797}, "pipe": "data"} +{"event": "data", "data": {"rate": 376.06112734952524, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.280021667480469}, "pipe": "data"} +{"event": "data", "data": {"rate": 369.5303855333127, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.347418308258057}, "pipe": "data"} +{"event": "data", "data": {"rate": 379.2474248667766, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24903.25, 81920.0], "load": 1.0, "temperature": 65, "power": 303.007}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.239522933959961}, "pipe": "data"} +{"event": "data", "data": {"rate": 385.99627556264767, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 31/32 (100%)] Loss: 7.240 (7.23) Time: 0.323s, 396.06/s (0.347s, 369.20/s) LR: 3.000e-02 Data: 0.000 (0.024)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.630 (0.630) Loss: 7.1711 (7.1711) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.7812 ( 0.7812)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.031 (0.129) Loss: 6.2437 (7.1747) Acc@1: 0.0000 ( 0.2180) Acc@5: 25.0000 ( 1.0174)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 395.8772269296337, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25147.25, 81920.0], "load": 0.93, "temperature": 66, "power": 311.129}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25147.25, 81920.0], "load": 0.03, "temperature": 61, "power": 88.939}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.21547794342041}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 0/32 ( 0%)] Loss: 7.215 (7.22) Time: 0.874s, 146.47/s (0.874s, 146.47/s) LR: 4.000e-02 Data: 0.551 (0.551)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 395.8767518764055, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.120698928833008}, "pipe": "data"} +{"event": "data", "data": {"rate": 314.3088574502491, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 395.5688491858323, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.282310962677002}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25391.25, 81920.0], "load": 0.98, "temperature": 65, "power": 218.589}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.228292465209961}, "pipe": "data"} +{"event": "data", "data": {"rate": 384.9075951734849, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1875810623168945}, "pipe": "data"} +{"event": "data", "data": {"rate": 371.4643543908312, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 393.50641413046856, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.40812349319458}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25391.25, 81920.0], "load": 0.96, "temperature": 66, "power": 304.248}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 378.66859646176067, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.3219895362854}, "pipe": "data"} +{"event": "data", "data": {"rate": 372.4570975423053, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.464129447937012}, "pipe": "data"} +{"event": "data", "data": {"rate": 373.1774465405524, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 396.79788600966253, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.423477649688721}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25391.25, 81920.0], "load": 0.99, "temperature": 65, "power": 296.199}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.40427303314209}, "pipe": "data"} +{"event": "data", "data": {"rate": 373.7480189590885, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 31/32 (100%)] Loss: 7.404 (7.33) Time: 0.324s, 395.09/s (0.361s, 354.56/s) LR: 4.000e-02 Data: 0.000 (0.038)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.966 (0.966) Loss: 6.9689 (6.9689) Acc@1: 0.0000 ( 0.0000) Acc@5: 4.6875 ( 4.6875)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.031 (0.138) Loss: 6.8198 (7.2575) Acc@1: 0.0000 ( 0.1211) Acc@5: 3.1250 ( 0.7025)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 394.81988415589774, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25635.25, 81920.0], "load": 0.94, "temperature": 66, "power": 314.644}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25635.25, 81920.0], "load": 0.03, "temperature": 62, "power": 92.749}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.211021423339844}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 0/32 ( 0%)] Loss: 7.211 (7.21) Time: 0.864s, 148.18/s (0.864s, 148.18/s) LR: 4.997e-02 Data: 0.541 (0.541)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 381.5340227569664, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.400643348693848}, "pipe": "data"} +{"event": "data", "data": {"rate": 396.9496928055171, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.457422256469727}, "pipe": "data"} +{"event": "data", "data": {"rate": 380.9269083549031, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 377.9113225412987, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25879.25, 81920.0], "load": 0.95, "temperature": 66, "power": 297.485}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.684177875518799}, "pipe": "data"} +{"event": "data", "data": {"rate": 392.1277911467258, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.538991451263428}, "pipe": "data"} +{"event": "data", "data": {"rate": 374.44642265052937, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.379839897155762}, "pipe": "data"} +{"event": "data", "data": {"rate": 362.02177190262785, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25879.25, 81920.0], "load": 0.96, "temperature": 67, "power": 313.091}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 389.4352642546199, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.36268424987793}, "pipe": "data"} +{"event": "data", "data": {"rate": 394.18096570052967, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "stop", "data": null, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-focalnet.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "focalnet_base_lrf", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/focalnet.D0", "--checkpoint-hist", "1"], "time": 1712629283.667647, "return_code": -15}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/focalnet.D1.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/focalnet.D1.data new file mode 100644 index 000000000..50caa90be --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/focalnet.D1.data @@ -0,0 +1,254 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "focalnet_base_lrf"}, "tags": ["classification", "convnet", "vision"], "weight": 2.0, "name": "focalnet", "tag": ["focalnet", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 59, "power": 85.623, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 60, "power": 92.525, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712629168.381467, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-focalnet.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "focalnet_base_lrf", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/focalnet.D1", "--checkpoint-hist", "1"], "time": 1712629168.4084082}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model focalnet_base_lrf created, param count:88749768\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.9\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.05) calculated from base learning rate (0.1) and global batch size (128) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 7.004467010498047}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [4371.25, 81920.0], "load": 1.0, "temperature": 62, "power": 177.76}}}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 0/32 ( 0%)] Loss: 7.004 (7.00) Time: 14.354s, 8.92/s (14.354s, 8.92/s) LR: 1.000e-05 Data: 0.844 (0.844)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [22425.25, 81920.0], "load": 0.9, "temperature": 60, "power": 237.58}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [21883.25, 81920.0], "load": 0.99, "temperature": 59, "power": 151.557}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [14195.25, 81920.0], "load": 0.99, "temperature": 60, "power": 267.122}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [13705.25, 81920.0], "load": 1.0, "temperature": 60, "power": 188.729}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.006728649139404}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.036325454711914}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23903.25, 81920.0], "load": 0.96, "temperature": 63, "power": 326.713}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.024367332458496}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.970383644104004}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.003751754760742}, "pipe": "data"} +{"event": "data", "data": {"rate": 396.2245226316359, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23907.25, 81920.0], "load": 0.95, "temperature": 63, "power": 320.123}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.990042686462402}, "pipe": "data"} +{"event": "data", "data": {"rate": 385.8512176796223, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.977944850921631}, "pipe": "data"} +{"event": "data", "data": {"rate": 370.54892291425205, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 394.2709085682313, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.979873180389404}, "pipe": "data"} +{"event": "data", "data": {"rate": 375.45699239149604, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23907.25, 81920.0], "load": 0.96, "temperature": 64, "power": 295.927}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.005303382873535}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 31/32 (100%)] Loss: 7.005 (7.00) Time: 0.323s, 395.98/s (0.791s, 161.85/s) LR: 1.000e-05 Data: 0.000 (0.034)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 1.001 (1.001) Loss: 6.9615 (6.9615) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 1.062 (0.170) Loss: 6.8639 (6.9459) Acc@1: 0.0000 ( 0.1453) Acc@5: 3.1250 ( 0.6541)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/focalnet.D1/20240409-021933-focalnet_base_lrf-224/checkpoint-0.pth.tar', 0.14534883720930233)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 396.0135334778719, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24151.25, 81920.0], "load": 0.93, "temperature": 64, "power": 312.627}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [7725.25, 81920.0], "load": 0.05, "temperature": 60, "power": 96.827}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.020679950714111}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 0/32 ( 0%)] Loss: 7.021 (7.02) Time: 1.357s, 94.35/s (1.357s, 94.35/s) LR: 1.001e-02 Data: 0.543 (0.543)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 7.0176873207092285}, "pipe": "data"} +{"event": "data", "data": {"rate": 215.42105742956923, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 390.8688409403499, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.062005996704102}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23993.25, 81920.0], "load": 0.96, "temperature": 64, "power": 285.19}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 373.0872498024073, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.047094821929932}, "pipe": "data"} +{"event": "data", "data": {"rate": 358.31080807928356, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.037492275238037}, "pipe": "data"} +{"event": "data", "data": {"rate": 395.6899671267035, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23993.25, 81920.0], "load": 0.99, "temperature": 65, "power": 334.176}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077417373657227}, "pipe": "data"} +{"event": "data", "data": {"rate": 385.51916641816996, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.993808746337891}, "pipe": "data"} +{"event": "data", "data": {"rate": 370.6396618197243, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 394.83532770586737, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0708794593811035}, "pipe": "data"} +{"event": "data", "data": {"rate": 381.8211342609497, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14704704284668}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [23993.25, 81920.0], "load": 0.95, "temperature": 66, "power": 314.21}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 373.12341658665116, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 31/32 (100%)] Loss: 7.108 (7.05) Time: 0.322s, 397.60/s (0.362s, 353.40/s) LR: 1.001e-02 Data: 0.000 (0.024)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.844 (0.844) Loss: 6.8922 (6.8922) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.031 (0.134) Loss: 6.9393 (6.9700) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 0.9932)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/focalnet.D1/20240409-021933-focalnet_base_lrf-224/checkpoint-1.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 397.5316786006815, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24237.25, 81920.0], "load": 0.94, "temperature": 66, "power": 309.649}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24237.25, 81920.0], "load": 0, "temperature": 61, "power": 93.454}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.995872497558594}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 0/32 ( 0%)] Loss: 6.996 (7.00) Time: 0.875s, 146.33/s (0.875s, 146.33/s) LR: 2.001e-02 Data: 0.551 (0.551)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 394.78235225608574, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.097558975219727}, "pipe": "data"} +{"event": "data", "data": {"rate": 394.5677733836491, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0096845626831055}, "pipe": "data"} +{"event": "data", "data": {"rate": 382.22445122781363, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 371.42054081351375, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24481.25, 81920.0], "load": 0.95, "temperature": 65, "power": 287.865}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.010942459106445}, "pipe": "data"} +{"event": "data", "data": {"rate": 392.9205579402343, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.231774806976318}, "pipe": "data"} +{"event": "data", "data": {"rate": 378.9520951005073, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.112374305725098}, "pipe": "data"} +{"event": "data", "data": {"rate": 371.73433384338546, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24481.25, 81920.0], "load": 0.96, "temperature": 67, "power": 293.934}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 373.4010329631985, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.235927104949951}, "pipe": "data"} +{"event": "data", "data": {"rate": 396.58512293280194, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.224554061889648}, "pipe": "data"} +{"event": "data", "data": {"rate": 381.1869546309103, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 31/32 (100%)] Loss: 7.261 (7.13) Time: 0.322s, 397.15/s (0.348s, 368.18/s) LR: 2.001e-02 Data: 0.001 (0.024)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.688 (0.688) Loss: 6.9301 (6.9301) Acc@1: 0.0000 ( 0.0000) Acc@5: 6.2500 ( 6.2500)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.031 (0.130) Loss: 6.7457 (7.1150) Acc@1: 0.0000 ( 0.1696) Acc@5: 0.0000 ( 0.8479)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 394.8603289093377, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24481.25, 81920.0], "load": 0, "temperature": 63, "power": 99.559}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24725.25, 81920.0], "load": 0.98, "temperature": 67, "power": 311.417}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.089291095733643}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 0/32 ( 0%)] Loss: 7.089 (7.09) Time: 0.982s, 130.31/s (0.982s, 130.31/s) LR: 3.000e-02 Data: 0.660 (0.660)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 367.4886759118157, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.231501579284668}, "pipe": "data"} +{"event": "data", "data": {"rate": 382.66554498626016, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24969.25, 81920.0], "load": 0.95, "temperature": 67, "power": 312.572}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0426130294799805}, "pipe": "data"} +{"event": "data", "data": {"rate": 390.86900804185194, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 371.5879204109807, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.210793495178223}, "pipe": "data"} +{"event": "data", "data": {"rate": 355.2436420988431, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.3732404708862305}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24969.25, 81920.0], "load": 0.99, "temperature": 67, "power": 286.861}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 395.09661731649345, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.324423789978027}, "pipe": "data"} +{"event": "data", "data": {"rate": 395.3403237450559, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.433404445648193}, "pipe": "data"} +{"event": "data", "data": {"rate": 380.8951204750138, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 375.1606915092, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.272957801818848}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [24969.25, 81920.0], "load": 0.95, "temperature": 68, "power": 299.681}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 388.7838090399549, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 31/32 (100%)] Loss: 7.239 (7.23) Time: 0.323s, 396.51/s (0.351s, 364.75/s) LR: 3.000e-02 Data: 0.000 (0.028)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.642 (0.642) Loss: 7.1712 (7.1712) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.7812 ( 0.7812)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.031 (0.128) Loss: 6.2444 (7.1748) Acc@1: 0.0000 ( 0.2180) Acc@5: 25.0000 ( 1.0174)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 396.08182184629237, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25213.25, 81920.0], "load": 0.96, "temperature": 68, "power": 232.779}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25213.25, 81920.0], "load": 0.03, "temperature": 63, "power": 98.708}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.215520858764648}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 0/32 ( 0%)] Loss: 7.216 (7.22) Time: 0.886s, 144.41/s (0.886s, 144.41/s) LR: 4.000e-02 Data: 0.564 (0.564)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 395.4238365529411, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1208038330078125}, "pipe": "data"} +{"event": "data", "data": {"rate": 315.0654139797084, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 395.56868445959026, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.281979560852051}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25457.25, 81920.0], "load": 1.0, "temperature": 67, "power": 302.156}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.228720188140869}, "pipe": "data"} +{"event": "data", "data": {"rate": 388.3450404260715, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.187459945678711}, "pipe": "data"} +{"event": "data", "data": {"rate": 368.22802448304793, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 394.25672309280185, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.408051490783691}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25457.25, 81920.0], "load": 0.94, "temperature": 68, "power": 300.789}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 393.32570728732617, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.321699142456055}, "pipe": "data"} +{"event": "data", "data": {"rate": 383.8384370281167, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.464118003845215}, "pipe": "data"} +{"event": "data", "data": {"rate": 371.4819591905128, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 361.60665929399653, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.423360824584961}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25457.25, 81920.0], "load": 0.96, "temperature": 68, "power": 328.473}}}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 31/32 (100%)] Loss: 7.404 (7.33) Time: 0.325s, 394.03/s (0.362s, 353.76/s) LR: 4.000e-02 Data: 0.000 (0.038)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 390.4322669623573, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.943 (0.943) Loss: 6.9695 (6.9695) Acc@1: 0.0000 ( 0.0000) Acc@5: 4.6875 ( 4.6875)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.032 (0.137) Loss: 6.8203 (7.2576) Acc@1: 0.0000 ( 0.1211) Acc@5: 3.1250 ( 0.7025)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 393.7831400622609, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25701.25, 81920.0], "load": 0.96, "temperature": 68, "power": 323.131}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25701.25, 81920.0], "load": 0, "temperature": 63, "power": 96.09}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.211348533630371}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 0/32 ( 0%)] Loss: 7.211 (7.21) Time: 0.870s, 147.05/s (0.870s, 147.05/s) LR: 4.997e-02 Data: 0.547 (0.547)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 359.33501630321217, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.400300025939941}, "pipe": "data"} +{"event": "data", "data": {"rate": 395.42470793616997, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 394.73497159792834, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.457738399505615}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.274268627166748}, "pipe": "data"} +{"event": "data", "data": {"rate": 380.9973790862873, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25945.25, 81920.0], "load": 0.99, "temperature": 68, "power": 297.143}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 373.78560451824563, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.490908145904541}, "pipe": "data"} +{"event": "data", "data": {"rate": 389.4409168247272, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.528045654296875}, "pipe": "data"} +{"event": "data", "data": {"rate": 371.60993969647444, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25945.25, 81920.0], "load": 0.96, "temperature": 69, "power": 289.414}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.465839862823486}, "pipe": "data"} +{"event": "data", "data": {"rate": 357.84352654951977, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 395.50070060399986, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.483597755432129}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.5227274894714355}, "pipe": "data"} +{"event": "data", "data": {"rate": 383.0874875201644, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "stop", "data": null, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-focalnet.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "focalnet_base_lrf", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/focalnet.D1", "--checkpoint-hist", "1"], "time": 1712629284.2200496, "return_code": -15}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/fp16.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/fp16.D0.data new file mode 100644 index 000000000..a224040b6 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/fp16.D0.data @@ -0,0 +1,125 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 30, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp16"}, "weight": 0.0, "name": "fp16", "tag": ["fp16", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.1, "memory": 0.010771942138671876}, "temperature": 68, "power": 97.008, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.01, "memory": 0.010771942138671876}, "temperature": 70, "power": 108.431, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712627444.851473, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "30", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp16"], "time": 1712627447.9133613}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 219.54032487836332, "units": "Tflops", "t": 1712627449.9178057}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [882.5, 81920.0], "load": 0, "temperature": 64, "power": 92.517}}, "t": 1712627449.2888935}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 0.5, "temperature": 68, "power": 371.835}}, "t": 1712627449.8096967}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.51999781597857, "units": "Tflops", "t": 1712627450.1777043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.20199074827715, "units": "Tflops", "t": 1712627450.4383228}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.61192919763073, "units": "Tflops", "t": 1712627450.7005813}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.66776820253693, "units": "Tflops", "t": 1712627450.9627714}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.31598479725713, "units": "Tflops", "t": 1712627451.2253366}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 69, "power": 277.474}}, "t": 1712627450.9954078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.0387272159888, "units": "Tflops", "t": 1712627451.4884827}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.55512342075514, "units": "Tflops", "t": 1712627451.7497637}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 69, "power": 263.307}}, "t": 1712627451.5243764}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.17882301679856, "units": "Tflops", "t": 1712627452.0104687}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.96148465520616, "units": "Tflops", "t": 1712627452.2713234}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.60602154188817, "units": "Tflops", "t": 1712627452.5316327}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.5874502356686, "units": "Tflops", "t": 1712627452.7939298}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.27964006342737, "units": "Tflops", "t": 1712627453.0575821}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 69, "power": 252.313}}, "t": 1712627452.8916006}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.1932410561177, "units": "Tflops", "t": 1712627453.32045}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.46078001111206, "units": "Tflops", "t": 1712627453.579801}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 69, "power": 274.75}}, "t": 1712627453.421219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.99525291249003, "units": "Tflops", "t": 1712627453.8412273}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.6129948898555, "units": "Tflops", "t": 1712627454.1015406}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.71630418486222, "units": "Tflops", "t": 1712627454.3636935}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.68740113672317, "units": "Tflops", "t": 1712627454.623805}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.13647272784007, "units": "Tflops", "t": 1712627454.8865736}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 70, "power": 262.522}}, "t": 1712627454.729826}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.61783938762412, "units": "Tflops", "t": 1712627455.147865}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.38927752972984, "units": "Tflops", "t": 1712627455.4103427}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 69, "power": 269.77}}, "t": 1712627455.259597}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.36918056502256, "units": "Tflops", "t": 1712627455.6728911}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.37945702051024, "units": "Tflops", "t": 1712627455.9353774}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 69, "power": 270.333}}, "t": 1712627455.7811892}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.3927034692804, "units": "Tflops", "t": 1712627456.197901}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.3792286457042, "units": "Tflops", "t": 1712627456.4603956}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.2607580884315, "units": "Tflops", "t": 1712627456.7230146}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 69, "power": 269.159}}, "t": 1712627456.5795505}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.10935154312563, "units": "Tflops", "t": 1712627456.9862304}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.70439739986287, "units": "Tflops", "t": 1712627457.2483907}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 70, "power": 258.504}}, "t": 1712627457.1088886}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.32879330640137, "units": "Tflops", "t": 1712627457.508924}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.25034696770132, "units": "Tflops", "t": 1712627457.768585}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 70, "power": 280.978}}, "t": 1712627457.644229}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.27661965978814, "units": "Tflops", "t": 1712627458.0292113}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.45552891375962, "units": "Tflops", "t": 1712627458.2917583}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.5894860678837, "units": "Tflops", "t": 1712627458.5550938}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 70, "power": 292.672}}, "t": 1712627458.4635005}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.11223014354346, "units": "Tflops", "t": 1712627458.8189971}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.52724935142675, "units": "Tflops", "t": 1712627459.0793834}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 70, "power": 301.819}}, "t": 1712627458.991628}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.27535653519286, "units": "Tflops", "t": 1712627459.3410304}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.59201139410968, "units": "Tflops", "t": 1712627459.6022658}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.8286840076248, "units": "Tflops", "t": 1712627459.8654838}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.29841403602668, "units": "Tflops", "t": 1712627460.1259863}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 248.797523639391, "units": "Tflops", "t": 1712627460.391196}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 70, "power": 272.301}}, "t": 1712627460.31225}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.24844873383412, "units": "Tflops", "t": 1712627460.6529195}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.26526024854422, "units": "Tflops", "t": 1712627460.913575}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 70, "power": 301.036}}, "t": 1712627460.8393538}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.94252287455885, "units": "Tflops", "t": 1712627461.1745064}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.76089782413126, "units": "Tflops", "t": 1712627461.4335115}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 70, "power": 270.042}}, "t": 1712627461.372041}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.10378867947816, "units": "Tflops", "t": 1712627461.6942673}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.6373799632635, "units": "Tflops", "t": 1712627461.9576735}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.05558231643693, "units": "Tflops", "t": 1712627462.220513}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 71, "power": 281.669}}, "t": 1712627462.201507}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.59270314652622, "units": "Tflops", "t": 1712627462.4818304}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.89224361591988, "units": "Tflops", "t": 1712627462.741724}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 70, "power": 254.024}}, "t": 1712627462.7301133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.46686725211126, "units": "Tflops", "t": 1712627463.0021207}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.76004839162968, "units": "Tflops", "t": 1712627463.264333}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 249.78574765303784, "units": "Tflops", "t": 1712627463.5285122}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.3232893199797, "units": "Tflops", "t": 1712627463.7910678}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.51215112966196, "units": "Tflops", "t": 1712627464.0513499}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 70, "power": 260.963}}, "t": 1712627464.0443187}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.14012559598527, "units": "Tflops", "t": 1712627464.31107}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.21694860305328, "units": "Tflops", "t": 1712627464.5728626}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.54382863384427, "units": "Tflops", "t": 1712627464.8341522}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 70, "power": 294.037}}, "t": 1712627464.5781648}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.82465364048912, "units": "Tflops", "t": 1712627465.0952659}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.6818854837436, "units": "Tflops", "t": 1712627465.3584838}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.71601265516156, "units": "Tflops", "t": 1712627465.6185617}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.89859796388077, "units": "Tflops", "t": 1712627465.8794749}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.50101127672656, "units": "Tflops", "t": 1712627466.1419573}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 71, "power": 290.039}}, "t": 1712627465.9138553}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.54728198144608, "units": "Tflops", "t": 1712627466.4053843}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.83658004778528, "units": "Tflops", "t": 1712627466.6674082}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 71, "power": 261.235}}, "t": 1712627466.4445555}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.74844617779527, "units": "Tflops", "t": 1712627466.9306226}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.86577983282166, "units": "Tflops", "t": 1712627467.1915762}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 70, "power": 242.731}}, "t": 1712627466.9641373}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.6192231791939, "units": "Tflops", "t": 1712627467.4528415}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.30111980892238, "units": "Tflops", "t": 1712627467.7143874}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.7345924392241, "units": "Tflops", "t": 1712627467.9756427}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 71, "power": 255.873}}, "t": 1712627467.82108}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.79867404978182, "units": "Tflops", "t": 1712627468.2388709}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.73691474428227, "units": "Tflops", "t": 1712627468.5009942}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 71, "power": 260.261}}, "t": 1712627468.3396509}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.66984926864257, "units": "Tflops", "t": 1712627468.7642899}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 249.67418071718834, "units": "Tflops", "t": 1712627469.028588}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 71, "power": 264.663}}, "t": 1712627468.8621948}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.01274290901046, "units": "Tflops", "t": 1712627469.2884254}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.11096596659104, "units": "Tflops", "t": 1712627469.5491288}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.24661300251907, "units": "Tflops", "t": 1712627469.8119147}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 71, "power": 272.099}}, "t": 1712627469.684232}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.3696372785361, "units": "Tflops", "t": 1712627470.0744796}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.31621305716513, "units": "Tflops", "t": 1712627470.337049}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 71, "power": 271.894}}, "t": 1712627470.2143764}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.34977177467752, "units": "Tflops", "t": 1712627470.5996342}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.34497712347306, "units": "Tflops", "t": 1712627470.8621604}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.3436072567266, "units": "Tflops", "t": 1712627471.1246967}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.37877189733695, "units": "Tflops", "t": 1712627471.387191}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.39064789434485, "units": "Tflops", "t": 1712627471.6496677}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 71, "power": 272.387}}, "t": 1712627471.5243933}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.33904114207843, "units": "Tflops", "t": 1712627471.9122741}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.3589049497087, "units": "Tflops", "t": 1712627472.1749647}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 71, "power": 272.289}}, "t": 1712627472.054503}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.3863084576495, "units": "Tflops", "t": 1712627472.4375253}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.39453067522206, "units": "Tflops", "t": 1712627472.7000053}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.39064789434485, "units": "Tflops", "t": 1712627472.9624805}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.40229659680972, "units": "Tflops", "t": 1712627473.2249424}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "30", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp16"], "time": 1712627474.2469668, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/fp16.D1.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/fp16.D1.data new file mode 100644 index 000000000..8f4206d48 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/fp16.D1.data @@ -0,0 +1,123 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 30, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp16"}, "weight": 0.0, "name": "fp16", "tag": ["fp16", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 66, "power": 92.786, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 68, "power": 102.723, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712627447.902508, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "30", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp16"], "time": 1712627447.9139285}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 220.05713437477098, "units": "Tflops", "t": 1712627449.889489}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [882.5625, 81920.0], "load": 0, "temperature": 67, "power": 103.781}}, "t": 1712627449.2306383}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 0.02, "temperature": 70, "power": 100.978}}, "t": 1712627449.7464473}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.8675267073518, "units": "Tflops", "t": 1712627450.1521115}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.69697048530335, "units": "Tflops", "t": 1712627450.4132617}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.21717850251193, "units": "Tflops", "t": 1712627450.6748936}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.0035529033504, "units": "Tflops", "t": 1712627450.936733}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.31000831758118, "units": "Tflops", "t": 1712627451.1972284}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 71, "power": 277.491}}, "t": 1712627450.9959357}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.508555168623, "units": "Tflops", "t": 1712627451.460009}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.25315628379084, "units": "Tflops", "t": 1712627451.7237058}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 72, "power": 267.597}}, "t": 1712627451.5247147}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.7343954948587, "units": "Tflops", "t": 1712627451.985916}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.38261264640707, "units": "Tflops", "t": 1712627452.2463403}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.33366394349528, "units": "Tflops", "t": 1712627452.5068111}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.6136790607981, "units": "Tflops", "t": 1712627452.7659757}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.6089933607264, "units": "Tflops", "t": 1712627453.025141}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 72, "power": 271.805}}, "t": 1712627452.891936}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.60782196265578, "units": "Tflops", "t": 1712627453.2844365}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.63312655925856, "units": "Tflops", "t": 1712627453.5435934}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 72, "power": 281.002}}, "t": 1712627453.4217355}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.6064162991988, "units": "Tflops", "t": 1712627453.802824}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.63828181597952, "units": "Tflops", "t": 1712627454.0619524}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.62234806061153, "units": "Tflops", "t": 1712627454.3211026}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.61883353008002, "units": "Tflops", "t": 1712627454.5802622}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.60571347329068, "units": "Tflops", "t": 1712627454.8394306}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 73, "power": 281.29}}, "t": 1712627454.728093}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.58627016177087, "units": "Tflops", "t": 1712627455.098672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.59142352135476, "units": "Tflops", "t": 1712627455.3580165}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 73, "power": 281.21}}, "t": 1712627455.2592635}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.61508480445082, "units": "Tflops", "t": 1712627455.6172733}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.60055953519102, "units": "Tflops", "t": 1712627455.8764393}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.58673864038488, "units": "Tflops", "t": 1712627456.1356308}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.6089933607264, "units": "Tflops", "t": 1712627456.394788}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.6221137555576, "units": "Tflops", "t": 1712627456.6539402}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 73, "power": 281.199}}, "t": 1712627456.5778186}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.6089933607264, "units": "Tflops", "t": 1712627456.9131486}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.60805624140764, "units": "Tflops", "t": 1712627457.172305}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 73, "power": 281.679}}, "t": 1712627457.1085567}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.61157047442325, "units": "Tflops", "t": 1712627457.431507}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.54505079439735, "units": "Tflops", "t": 1712627457.6907458}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 73, "power": 281.674}}, "t": 1712627457.6472142}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.55910135185613, "units": "Tflops", "t": 1712627457.950182}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.59704560611993, "units": "Tflops", "t": 1712627458.209355}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.60618202346498, "units": "Tflops", "t": 1712627458.4685342}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 73, "power": 282.26}}, "t": 1712627458.4631667}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.60711912898716, "units": "Tflops", "t": 1712627458.7277668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.5909550254988, "units": "Tflops", "t": 1712627458.9869437}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.6021994019526, "units": "Tflops", "t": 1712627459.2461133}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 73, "power": 281.79}}, "t": 1712627458.991112}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.58908105931732, "units": "Tflops", "t": 1712627459.5053484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.6195364284246, "units": "Tflops", "t": 1712627459.7645068}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.5923605182394, "units": "Tflops", "t": 1712627460.02369}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.61414764029098, "units": "Tflops", "t": 1712627460.2828438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.5916577699293, "units": "Tflops", "t": 1712627460.5420685}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 73, "power": 282.058}}, "t": 1712627460.312414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.62726856635717, "units": "Tflops", "t": 1712627460.8012643}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.60993048694363, "units": "Tflops", "t": 1712627461.060423}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 73, "power": 282.091}}, "t": 1712627460.8403914}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.61227333266768, "units": "Tflops", "t": 1712627461.3196275}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.62094223675626, "units": "Tflops", "t": 1712627461.5787838}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 73, "power": 282.073}}, "t": 1712627461.377145}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.630549009175, "units": "Tflops", "t": 1712627461.837969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.6326579099074, "units": "Tflops", "t": 1712627462.0971162}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.63546983189093, "units": "Tflops", "t": 1712627462.3562522}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 73, "power": 281.673}}, "t": 1712627462.2018456}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.6075876843351, "units": "Tflops", "t": 1712627462.6154783}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.5977483841744, "units": "Tflops", "t": 1712627462.8746507}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 74, "power": 282.552}}, "t": 1712627462.7307997}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.58556744708267, "units": "Tflops", "t": 1712627463.1340683}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.61836493333965, "units": "Tflops", "t": 1712627463.3932295}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.60290220846, "units": "Tflops", "t": 1712627463.6523943}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.5986854276157, "units": "Tflops", "t": 1712627463.9115613}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.60618202346498, "units": "Tflops", "t": 1712627464.17072}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 74, "power": 283.143}}, "t": 1712627464.045179}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.6082905205906, "units": "Tflops", "t": 1712627464.4299326}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.6211765396541, "units": "Tflops", "t": 1712627464.6890776}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 74, "power": 282.949}}, "t": 1712627464.5786808}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.6040735612614, "units": "Tflops", "t": 1712627464.948289}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.6176620414631, "units": "Tflops", "t": 1712627465.2074409}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.58790984446307, "units": "Tflops", "t": 1712627465.4666271}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.61461622150853, "units": "Tflops", "t": 1712627465.7257776}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.6040735612614, "units": "Tflops", "t": 1712627465.9849362}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 74, "power": 283.148}}, "t": 1712627465.9141905}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.55512020309428, "units": "Tflops", "t": 1712627466.244205}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.53943100570092, "units": "Tflops", "t": 1712627466.5036292}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 74, "power": 283.428}}, "t": 1712627466.4470477}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.57291924602137, "units": "Tflops", "t": 1712627466.7629116}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.5808827816302, "units": "Tflops", "t": 1712627467.022101}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.57245081826554, "units": "Tflops", "t": 1712627467.2813036}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.5780720651129, "units": "Tflops", "t": 1712627467.5405004}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.59306327042904, "units": "Tflops", "t": 1712627467.7996767}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.58837832911163, "units": "Tflops", "t": 1712627468.0588555}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 74, "power": 283.727}}, "t": 1712627467.8135283}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.57830628911884, "units": "Tflops", "t": 1712627468.318113}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.5694060798714, "units": "Tflops", "t": 1712627468.5773084}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 74, "power": 283.54}}, "t": 1712627468.336395}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.58697288033844, "units": "Tflops", "t": 1712627468.8365347}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.59681134763062, "units": "Tflops", "t": 1712627469.095721}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 74, "power": 283.444}}, "t": 1712627468.8572187}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.5975141243918, "units": "Tflops", "t": 1712627469.3549795}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.58954954827638, "units": "Tflops", "t": 1712627469.6141708}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.5722166050341, "units": "Tflops", "t": 1712627469.873367}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 74, "power": 283.239}}, "t": 1712627469.6837168}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.5649562086111, "units": "Tflops", "t": 1712627470.1327276}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.58181970092812, "units": "Tflops", "t": 1712627470.3919158}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 74, "power": 283.428}}, "t": 1712627470.2138658}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.57127975641788, "units": "Tflops", "t": 1712627470.6511757}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.60946192297268, "units": "Tflops", "t": 1712627470.9103377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.61461622150853, "units": "Tflops", "t": 1712627471.169497}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.60055953519102, "units": "Tflops", "t": 1712627471.428677}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.6218794509349, "units": "Tflops", "t": 1712627471.6878319}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 75, "power": 283.841}}, "t": 1712627471.5249045}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.60383928983887, "units": "Tflops", "t": 1712627471.9470518}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.60337074828718, "units": "Tflops", "t": 1712627472.206215}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1801.25, 81920.0], "load": 1.0, "temperature": 75, "power": 283.253}}, "t": 1712627472.0552235}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.60571347329068, "units": "Tflops", "t": 1712627472.4654207}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.60266793919305, "units": "Tflops", "t": 1712627472.7245922}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.6108676200593, "units": "Tflops", "t": 1712627472.9837441}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "30", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp16"], "time": 1712627474.1846704, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/fp32.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/fp32.D0.data new file mode 100644 index 000000000..56d7bbf72 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/fp32.D0.data @@ -0,0 +1,253 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp32"}, "weight": 0.0, "name": "fp32", "tag": ["fp32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 63, "power": 89.428, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 65, "power": 98.107, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712627517.203767, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32"], "time": 1712627520.350044}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 18.833351343416947, "units": "Tflops", "t": 1712627523.2292132}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [882.5, 81920.0], "load": 0, "temperature": 60, "power": 88.081}}, "t": 1712627521.728529}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 0.12, "temperature": 63, "power": 278.471}}, "t": 1712627522.2427547}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.14867713131206, "units": "Tflops", "t": 1712627524.3783422}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 66, "power": 281.56}}, "t": 1712627523.2903793}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 66, "power": 292.004}}, "t": 1712627523.8151133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.150315162594165, "units": "Tflops", "t": 1712627525.5271902}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 66, "power": 279.931}}, "t": 1712627525.0165277}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.150851957105267, "units": "Tflops", "t": 1712627526.6758187}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 67, "power": 290.336}}, "t": 1712627525.5425644}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 67, "power": 282.652}}, "t": 1712627526.078329}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.151241649010036, "units": "Tflops", "t": 1712627527.8242443}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 67, "power": 281.011}}, "t": 1712627526.7721376}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 67, "power": 283.028}}, "t": 1712627527.2858179}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 67, "power": 281.106}}, "t": 1712627527.8170455}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.151543869977363, "units": "Tflops", "t": 1712627528.9726663}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 67, "power": 282.15}}, "t": 1712627528.5271683}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.148863979943762, "units": "Tflops", "t": 1712627530.1213443}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 67, "power": 294.528}}, "t": 1712627529.050655}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.1465266468188, "units": "Tflops", "t": 1712627531.2700195}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 68, "power": 293.068}}, "t": 1712627530.2483919}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 68, "power": 291.906}}, "t": 1712627530.7685215}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.14499655921804, "units": "Tflops", "t": 1712627532.4189308}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 68, "power": 283.026}}, "t": 1712627531.2843647}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 68, "power": 292.294}}, "t": 1712627532.010252}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.16245406817423, "units": "Tflops", "t": 1712627533.5666668}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 68, "power": 296.368}}, "t": 1712627532.5336027}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.17549734500818, "units": "Tflops", "t": 1712627534.7135947}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 68, "power": 285.672}}, "t": 1712627533.732062}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 69, "power": 300.179}}, "t": 1712627534.2486024}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.177267562418262, "units": "Tflops", "t": 1712627535.8606107}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 69, "power": 294.255}}, "t": 1712627534.7708952}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 69, "power": 289.961}}, "t": 1712627535.4937837}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.17679706734453, "units": "Tflops", "t": 1712627537.007523}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 69, "power": 297.253}}, "t": 1712627536.0173151}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 69, "power": 292.673}}, "t": 1712627536.5445855}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.1767930801968, "units": "Tflops", "t": 1712627538.1543984}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 69, "power": 296.088}}, "t": 1712627537.249103}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 69, "power": 295.894}}, "t": 1712627537.7761345}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.17772213043314, "units": "Tflops", "t": 1712627539.3013496}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 70, "power": 297.436}}, "t": 1712627538.9958205}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.179341205608416, "units": "Tflops", "t": 1712627540.4480736}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 70, "power": 302.584}}, "t": 1712627539.5137455}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 70, "power": 288.112}}, "t": 1712627540.0323536}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.17806107674923, "units": "Tflops", "t": 1712627541.594902}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 70, "power": 303.576}}, "t": 1712627540.7439084}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 70, "power": 288.208}}, "t": 1712627541.268035}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.174229687434572, "units": "Tflops", "t": 1712627542.7419639}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 70, "power": 302.011}}, "t": 1712627541.7965672}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 71, "power": 299.774}}, "t": 1712627542.5125742}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.95222259293011, "units": "Tflops", "t": 1712627543.9026546}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 71, "power": 292.489}}, "t": 1712627543.0386493}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 71, "power": 299.965}}, "t": 1712627543.5662794}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.178862632762524, "units": "Tflops", "t": 1712627545.0494719}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 0.93, "temperature": 71, "power": 297.136}}, "t": 1712627544.2636428}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 71, "power": 298.901}}, "t": 1712627544.784342}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.17951668830285, "units": "Tflops", "t": 1712627546.1961877}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 71, "power": 298.884}}, "t": 1712627545.3084896}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 71, "power": 302.617}}, "t": 1712627546.012285}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.615053945462847, "units": "Tflops", "t": 1712627547.377694}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 71, "power": 291.414}}, "t": 1712627546.527558}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 71, "power": 302.512}}, "t": 1712627547.0450995}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.179564547776828, "units": "Tflops", "t": 1712627548.5246804}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 71, "power": 298.807}}, "t": 1712627547.7404394}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 72, "power": 295.1}}, "t": 1712627548.2599282}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.180904710028727, "units": "Tflops", "t": 1712627549.6713138}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 72, "power": 299.87}}, "t": 1712627548.792118}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 72, "power": 292.76}}, "t": 1712627549.5086508}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.202215510540945, "units": "Tflops", "t": 1712627550.8796072}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 72, "power": 291.616}}, "t": 1712627550.0288138}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 72, "power": 292.378}}, "t": 1712627550.5585685}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.17891846503056, "units": "Tflops", "t": 1712627552.0264575}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 0.82, "temperature": 72, "power": 293.655}}, "t": 1712627551.2443533}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 72, "power": 305.904}}, "t": 1712627551.7708285}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.181754374169866, "units": "Tflops", "t": 1712627553.1730368}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 72, "power": 290.628}}, "t": 1712627552.2949123}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 72, "power": 292.872}}, "t": 1712627552.9939914}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.30273813010191, "units": "Tflops", "t": 1712627554.374908}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 72, "power": 306.019}}, "t": 1712627553.520669}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 72, "power": 293.062}}, "t": 1712627554.04387}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.180190731233786, "units": "Tflops", "t": 1712627555.521673}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 72, "power": 297.248}}, "t": 1712627554.741062}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 73, "power": 302.685}}, "t": 1712627555.265066}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.136901102448473, "units": "Tflops", "t": 1712627556.6709626}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 72, "power": 296.383}}, "t": 1712627556.4736013}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.503954600543512, "units": "Tflops", "t": 1712627557.8594935}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 73, "power": 300.653}}, "t": 1712627556.9982994}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 73, "power": 299.49}}, "t": 1712627557.5249217}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.162294822036095, "units": "Tflops", "t": 1712627559.0073175}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 73, "power": 302.02}}, "t": 1712627558.2252057}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 73, "power": 295.39}}, "t": 1712627558.7491243}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.15865676885197, "units": "Tflops", "t": 1712627560.1554134}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 73, "power": 307.448}}, "t": 1712627559.2761912}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 73, "power": 306.479}}, "t": 1712627559.9872684}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.34847697044663, "units": "Tflops", "t": 1712627561.35409}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 73, "power": 293.144}}, "t": 1712627560.515112}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.144881315770437, "units": "Tflops", "t": 1712627562.5029223}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 73, "power": 306.766}}, "t": 1712627561.7039037}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 73, "power": 302.573}}, "t": 1712627562.2259758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.149714783645344, "units": "Tflops", "t": 1712627563.6514225}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 298.501}}, "t": 1712627562.8030052}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 289.768}}, "t": 1712627563.5384812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 17.822183475977397, "units": "Tflops", "t": 1712627564.88545}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 295.006}}, "t": 1712627564.0641522}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.155350284649476, "units": "Tflops", "t": 1712627566.0335765}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 306.683}}, "t": 1712627565.2301025}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 307.071}}, "t": 1712627565.7560873}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.128883899336675, "units": "Tflops", "t": 1712627567.183353}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 73, "power": 285.841}}, "t": 1712627566.2916903}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 303.575}}, "t": 1712627566.9980328}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.36436886074778, "units": "Tflops", "t": 1712627568.3811924}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 294.536}}, "t": 1712627567.5207798}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 305.912}}, "t": 1712627568.0526266}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.14886795548618, "units": "Tflops", "t": 1712627569.529827}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 0.83, "temperature": 74, "power": 305.696}}, "t": 1712627568.741739}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 295.2}}, "t": 1712627569.267227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.095703933611773, "units": "Tflops", "t": 1712627570.681626}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 293.941}}, "t": 1712627570.4809506}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.46833355563609, "units": "Tflops", "t": 1712627571.8724453}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 308.528}}, "t": 1712627571.0048544}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 308.308}}, "t": 1712627571.5386744}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.125758217398566, "units": "Tflops", "t": 1712627573.0224504}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 308.221}}, "t": 1712627572.241084}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 294.533}}, "t": 1712627572.7731702}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.123200519610027, "units": "Tflops", "t": 1712627574.172564}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 305.792}}, "t": 1712627573.9933333}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.3642372286885, "units": "Tflops", "t": 1712627575.3701618}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 282.44}}, "t": 1712627574.51868}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.125845468769135, "units": "Tflops", "t": 1712627576.5203142}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 307.624}}, "t": 1712627575.725878}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 288.668}}, "t": 1712627576.2547936}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.09458120594874, "units": "Tflops", "t": 1712627577.672122}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 304.525}}, "t": 1712627576.7823062}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 294.043}}, "t": 1712627577.5057206}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.263575027048276, "units": "Tflops", "t": 1712627578.8763578}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 295.98}}, "t": 1712627578.0307016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.082946614294183, "units": "Tflops", "t": 1712627580.0288787}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 289.054}}, "t": 1712627579.2384906}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 286.923}}, "t": 1712627579.7606938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.132014571636425, "units": "Tflops", "t": 1712627581.1784296}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 304.142}}, "t": 1712627580.2885869}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 297.35}}, "t": 1712627581.011358}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.071344421767296, "units": "Tflops", "t": 1712627582.3954427}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 304.924}}, "t": 1712627581.5344594}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 291.989}}, "t": 1712627582.0599587}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.123505821216586, "units": "Tflops", "t": 1712627583.5455847}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 295.602}}, "t": 1712627582.75856}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 307.926}}, "t": 1712627583.2871082}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.108027029951295, "units": "Tflops", "t": 1712627584.6965814}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 293.176}}, "t": 1712627584.4974163}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.490433269438583, "units": "Tflops", "t": 1712627585.8859937}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 303.861}}, "t": 1712627585.024028}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 298.221}}, "t": 1712627585.5487688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.111847845853976, "units": "Tflops", "t": 1712627587.0371048}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 0.95, "temperature": 74, "power": 305.307}}, "t": 1712627586.2556913}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 298.9}}, "t": 1712627586.7757187}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.105232664828602, "units": "Tflops", "t": 1712627588.1882808}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 283.056}}, "t": 1712627587.2987688}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 305.41}}, "t": 1712627588.023866}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.191969183685867, "units": "Tflops", "t": 1712627589.3972535}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 293.728}}, "t": 1712627588.5526567}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.117857337691888, "units": "Tflops", "t": 1712627590.5476704}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 306.384}}, "t": 1712627589.7546375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 291.8}}, "t": 1712627590.267761}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.1058263000879, "units": "Tflops", "t": 1712627591.6988008}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 293.846}}, "t": 1712627590.7911496}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 301.213}}, "t": 1712627591.529179}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.31426963572649, "units": "Tflops", "t": 1712627592.8996797}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 297.558}}, "t": 1712627592.0511272}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 301.525}}, "t": 1712627592.5790431}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.137643628944247, "units": "Tflops", "t": 1712627594.0490077}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 294.917}}, "t": 1712627593.2611158}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 295.784}}, "t": 1712627593.7793987}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.122356034774157, "units": "Tflops", "t": 1712627595.1991532}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 307.447}}, "t": 1712627594.2962358}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 295.28}}, "t": 1712627595.0076675}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.513016971865433, "units": "Tflops", "t": 1712627596.3873343}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 292.575}}, "t": 1712627595.52111}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 294.328}}, "t": 1712627596.0413415}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.124921437864813, "units": "Tflops", "t": 1712627597.537412}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 0.82, "temperature": 74, "power": 302.399}}, "t": 1712627596.7536135}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 303.666}}, "t": 1712627597.27354}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.12359701709376, "units": "Tflops", "t": 1712627598.6874788}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 304.255}}, "t": 1712627597.7965045}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 287.024}}, "t": 1712627598.5058315}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.277729905275468, "units": "Tflops", "t": 1712627599.8907866}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 302.118}}, "t": 1712627599.029187}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 298.22}}, "t": 1712627599.5651417}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.127729499345243, "units": "Tflops", "t": 1712627601.0406842}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 307.478}}, "t": 1712627600.2590837}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 291.006}}, "t": 1712627600.7858412}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.128550656167594, "units": "Tflops", "t": 1712627602.1904697}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 308.892}}, "t": 1712627601.9993286}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.49645514656145, "units": "Tflops", "t": 1712627603.3794825}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 295.49}}, "t": 1712627602.5156233}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 308.612}}, "t": 1712627603.0292244}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.108114119617763, "units": "Tflops", "t": 1712627604.530568}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 295.969}}, "t": 1712627603.743156}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 308.319}}, "t": 1712627604.269966}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.11233100081286, "units": "Tflops", "t": 1712627605.6813087}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 310.063}}, "t": 1712627605.4739573}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.676646116748277, "units": "Tflops", "t": 1712627606.8588529}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 294.322}}, "t": 1712627606.0069153}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.116236740067546, "units": "Tflops", "t": 1712627608.009394}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 294.133}}, "t": 1712627607.2183015}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 297.549}}, "t": 1712627607.7456872}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.102625064240215, "units": "Tflops", "t": 1712627609.160724}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 305.895}}, "t": 1712627608.2762856}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 298.015}}, "t": 1712627608.9967968}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.309841389639217, "units": "Tflops", "t": 1712627610.3620634}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 306.177}}, "t": 1712627609.5203998}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.121852551139046, "units": "Tflops", "t": 1712627611.5122519}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 71, "power": 292.476}}, "t": 1712627610.7246811}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 297.624}}, "t": 1712627611.2513685}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.127138468747532, "units": "Tflops", "t": 1712627612.6621022}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 306.67}}, "t": 1712627611.7778006}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 291.423}}, "t": 1712627612.5023022}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.206318711335143, "units": "Tflops", "t": 1712627613.8701227}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 307.921}}, "t": 1712627613.0268667}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.126436419460642, "units": "Tflops", "t": 1712627615.0200393}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 290.634}}, "t": 1712627614.220976}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 296.091}}, "t": 1712627614.7471955}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.06142154660731, "units": "Tflops", "t": 1712627616.17385}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 305.713}}, "t": 1712627615.277155}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 294.724}}, "t": 1712627615.9948392}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.47246513416236, "units": "Tflops", "t": 1712627617.3644853}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 298.778}}, "t": 1712627616.520194}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.11767109367359, "units": "Tflops", "t": 1712627618.5149438}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 306.494}}, "t": 1712627617.71792}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 294.819}}, "t": 1712627618.2466807}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.069961869584702, "units": "Tflops", "t": 1712627619.668236}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 309.497}}, "t": 1712627618.7771955}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 295.01}}, "t": 1712627619.4926379}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.48868380405066, "units": "Tflops", "t": 1712627620.8577874}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 295.022}}, "t": 1712627620.0285459}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.113055779056715, "units": "Tflops", "t": 1712627622.0084713}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 304.737}}, "t": 1712627621.2169566}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 75, "power": 301.037}}, "t": 1712627621.7351077}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.10272397388311, "units": "Tflops", "t": 1712627623.1599271}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 297.15}}, "t": 1712627622.2592232}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 294.921}}, "t": 1712627622.9800432}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.455008226393332, "units": "Tflops", "t": 1712627624.3516624}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 294.223}}, "t": 1712627623.5052793}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.13187170496159, "units": "Tflops", "t": 1712627625.5012984}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 295.597}}, "t": 1712627624.7018378}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 297.439}}, "t": 1712627625.2304938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.12727333157361, "units": "Tflops", "t": 1712627626.6511393}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 294.625}}, "t": 1712627625.7692475}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 302.003}}, "t": 1712627626.4898288}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32"], "time": 1712627627.4697547, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/fp32.D1.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/fp32.D1.data new file mode 100644 index 000000000..e08d8a9e5 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/fp32.D1.data @@ -0,0 +1,256 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp32"}, "weight": 0.0, "name": "fp32", "tag": ["fp32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 61, "power": 87.666, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 63, "power": 96.178, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712627520.338433, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32"], "time": 1712627520.3506315}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 17.86100664944628, "units": "Tflops", "t": 1712627523.2678583}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [882.5625, 81920.0], "load": 0, "temperature": 62, "power": 94.198}}, "t": 1712627521.6709208}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 0.07, "temperature": 65, "power": 277.943}}, "t": 1712627522.1867242}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.180067086674637, "units": "Tflops", "t": 1712627524.4151587}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 0.75, "temperature": 68, "power": 291.639}}, "t": 1712627523.2899318}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 69, "power": 295.741}}, "t": 1712627523.814364}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.17914977360403, "units": "Tflops", "t": 1712627525.5630326}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 0.99, "temperature": 70, "power": 296.402}}, "t": 1712627525.0168886}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 70, "power": 291.942}}, "t": 1712627525.5456522}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.49493818362845, "units": "Tflops", "t": 1712627526.7523043}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 70, "power": 304.694}}, "t": 1712627526.079492}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.16600196214452, "units": "Tflops", "t": 1712627527.9005744}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 70, "power": 292.227}}, "t": 1712627526.7738767}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 70, "power": 307.592}}, "t": 1712627527.2885165}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 71, "power": 293.791}}, "t": 1712627527.816184}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.15304318606479, "units": "Tflops", "t": 1712627529.0491517}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 71, "power": 294.649}}, "t": 1712627528.5262609}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.404788517804363, "units": "Tflops", "t": 1712627530.2444608}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 71, "power": 302.546}}, "t": 1712627529.0503635}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 71, "power": 296.609}}, "t": 1712627529.5715768}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.134328515170807, "units": "Tflops", "t": 1712627531.394221}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 71, "power": 298.378}}, "t": 1712627530.2593668}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 72, "power": 296.501}}, "t": 1712627530.771377}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 72, "power": 301.467}}, "t": 1712627531.2880487}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.118110952402894, "units": "Tflops", "t": 1712627532.5447915}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 72, "power": 299.336}}, "t": 1712627532.0107028}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 72, "power": 297.374}}, "t": 1712627532.5343647}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.6530920390827, "units": "Tflops", "t": 1712627533.7239096}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.076540807227524, "units": "Tflops", "t": 1712627534.8769133}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 72, "power": 283.04}}, "t": 1712627533.7267303}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 72, "power": 301.86}}, "t": 1712627534.244838}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 72, "power": 304.979}}, "t": 1712627534.7661853}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.095798818055986, "units": "Tflops", "t": 1712627536.0288856}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 72, "power": 297.675}}, "t": 1712627535.493333}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 72, "power": 309.942}}, "t": 1712627536.0164034}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.390443569765075, "units": "Tflops", "t": 1712627537.2248168}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 73, "power": 293.185}}, "t": 1712627536.5470765}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.977565559987976, "units": "Tflops", "t": 1712627538.3840187}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 72, "power": 285.477}}, "t": 1712627537.2495458}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 73, "power": 299.446}}, "t": 1712627537.7767434}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 73, "power": 294.453}}, "t": 1712627538.298938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.98195158790138, "units": "Tflops", "t": 1712627539.5428357}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 73, "power": 291.726}}, "t": 1712627539.0018287}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 73, "power": 286.496}}, "t": 1712627539.5195665}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.551993241768756, "units": "Tflops", "t": 1712627540.7283714}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 73, "power": 299.033}}, "t": 1712627540.039246}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.990761009903657, "units": "Tflops", "t": 1712627541.8864841}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 73, "power": 295.741}}, "t": 1712627540.7411318}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 73, "power": 298.178}}, "t": 1712627541.2672734}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 73, "power": 287.442}}, "t": 1712627541.799035}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.981748450126045, "units": "Tflops", "t": 1712627543.0454247}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 73, "power": 301.181}}, "t": 1712627542.5112205}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 73, "power": 295.243}}, "t": 1712627543.038349}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.416585073992383, "units": "Tflops", "t": 1712627544.239649}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 289.49}}, "t": 1712627543.5580606}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.987719389285818, "units": "Tflops", "t": 1712627545.3982031}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 288.226}}, "t": 1712627544.2651722}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 298.257}}, "t": 1712627544.7854009}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.982951714189934, "units": "Tflops", "t": 1712627546.55683}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 288.904}}, "t": 1712627546.0064664}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 293.509}}, "t": 1712627546.5239089}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.84515322711463, "units": "Tflops", "t": 1712627547.723896}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 300.992}}, "t": 1712627547.04252}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.984940559659105, "units": "Tflops", "t": 1712627548.8825696}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 289.286}}, "t": 1712627547.7418077}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 300.291}}, "t": 1712627548.2609885}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 290.371}}, "t": 1712627548.7942815}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.982014092706976, "units": "Tflops", "t": 1712627550.0412872}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 74, "power": 299.717}}, "t": 1712627549.5091028}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 75, "power": 292.215}}, "t": 1712627550.030344}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.67108084426962, "units": "Tflops", "t": 1712627551.2194178}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 75, "power": 301.848}}, "t": 1712627550.5601628}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.982666511186675, "units": "Tflops", "t": 1712627552.3780591}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 75, "power": 301.277}}, "t": 1712627551.2432919}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 75, "power": 290.969}}, "t": 1712627551.769012}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 75, "power": 301.181}}, "t": 1712627552.2931416}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.982869668612302, "units": "Tflops", "t": 1712627553.5366876}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 75, "power": 293.877}}, "t": 1712627552.9991398}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 75, "power": 293.78}}, "t": 1712627553.52096}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.670944778611112, "units": "Tflops", "t": 1712627554.7146392}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 75, "power": 304.867}}, "t": 1712627554.0518985}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.990936968929, "units": "Tflops", "t": 1712627555.8730175}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 75, "power": 303.42}}, "t": 1712627554.7413552}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 75, "power": 302.039}}, "t": 1712627555.2658274}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.98256884089527, "units": "Tflops", "t": 1712627557.0316505}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 75, "power": 292.617}}, "t": 1712627556.4786637}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 76, "power": 293.877}}, "t": 1712627556.9987497}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.7960192212872, "units": "Tflops", "t": 1712627558.2017584}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 76, "power": 301.468}}, "t": 1712627557.522751}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.970305536203224, "units": "Tflops", "t": 1712627559.3611069}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 76, "power": 299.335}}, "t": 1712627558.2245955}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 76, "power": 301.77}}, "t": 1712627558.7477577}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 76, "power": 302.818}}, "t": 1712627559.2692864}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.981428126314068, "units": "Tflops", "t": 1712627560.5199654}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 76, "power": 293.193}}, "t": 1712627559.9869757}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 76, "power": 300.215}}, "t": 1712627560.514664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.731002014074747, "units": "Tflops", "t": 1712627561.6941404}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.974656980213037, "units": "Tflops", "t": 1712627562.8531668}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 76, "power": 292.996}}, "t": 1712627561.7036002}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 76, "power": 301.772}}, "t": 1712627562.2252195}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 76, "power": 278.075}}, "t": 1712627562.7960722}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.967773641420457, "units": "Tflops", "t": 1712627564.0127935}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 76, "power": 293.692}}, "t": 1712627563.5380232}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.20106967544434, "units": "Tflops", "t": 1712627565.221128}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 76, "power": 301.346}}, "t": 1712627564.0662415}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.975507989131497, "units": "Tflops", "t": 1712627566.380315}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 76, "power": 292.902}}, "t": 1712627565.2302568}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 300.897}}, "t": 1712627565.7566953}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 76, "power": 278.956}}, "t": 1712627566.2904675}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.937571848390508, "units": "Tflops", "t": 1712627567.5417109}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 305.447}}, "t": 1712627567.0003035}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 295.637}}, "t": 1712627567.5224476}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.68068225762175, "units": "Tflops", "t": 1712627568.7190447}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 305.083}}, "t": 1712627568.0440314}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.949750028413774, "units": "Tflops", "t": 1712627569.8796763}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 76, "power": 295.233}}, "t": 1712627568.741295}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 295.137}}, "t": 1712627569.2666163}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.935651226999955, "units": "Tflops", "t": 1712627571.0414903}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 298.542}}, "t": 1712627570.479591}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 300.102}}, "t": 1712627571.00395}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.698667480946852, "units": "Tflops", "t": 1712627572.2177818}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 296.699}}, "t": 1712627571.530727}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.939481192198258, "units": "Tflops", "t": 1712627573.3790352}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 295.537}}, "t": 1712627572.2406197}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 306.237}}, "t": 1712627572.77272}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.923782348667707, "units": "Tflops", "t": 1712627574.5412426}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 296.785}}, "t": 1712627573.9928908}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 294.168}}, "t": 1712627574.518069}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.72195303899213, "units": "Tflops", "t": 1712627575.7159767}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.950042029992527, "units": "Tflops", "t": 1712627576.8764873}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 307.309}}, "t": 1712627575.725118}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 294.956}}, "t": 1712627576.2541752}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 286.867}}, "t": 1712627576.784028}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.939053403501532, "units": "Tflops", "t": 1712627578.0378354}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 296.512}}, "t": 1712627577.505266}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 288.222}}, "t": 1712627578.0304062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.499897980587374, "units": "Tflops", "t": 1712627579.2268937}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.935989445823402, "units": "Tflops", "t": 1712627580.3882718}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 288.122}}, "t": 1712627579.2386348}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 306.543}}, "t": 1712627579.762533}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 278.866}}, "t": 1712627580.2849667}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.939395632912884, "units": "Tflops", "t": 1712627581.5495565}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 297.192}}, "t": 1712627581.010751}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 306.543}}, "t": 1712627581.534163}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.563613156698864, "units": "Tflops", "t": 1712627582.7343116}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 302.357}}, "t": 1712627582.0637138}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.931049541715442, "units": "Tflops", "t": 1712627583.8960989}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 208.665}}, "t": 1712627582.7576487}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 296.504}}, "t": 1712627583.2865016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.95334032594562, "units": "Tflops", "t": 1712627585.0565314}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 296.221}}, "t": 1712627584.4972703}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 295.524}}, "t": 1712627585.023578}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.678514553697653, "units": "Tflops", "t": 1712627586.2341552}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 308.366}}, "t": 1712627585.5466812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.952207015701106, "units": "Tflops", "t": 1712627587.3945947}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 76, "power": 301.945}}, "t": 1712627586.25691}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 293.593}}, "t": 1712627586.7773936}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 300.789}}, "t": 1712627587.3009403}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.936004996404655, "units": "Tflops", "t": 1712627588.5561018}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 306.829}}, "t": 1712627588.0243099}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 291.346}}, "t": 1712627588.5529554}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.55219474866094, "units": "Tflops", "t": 1712627589.7415853}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.944927464281278, "units": "Tflops", "t": 1712627590.902421}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 303.615}}, "t": 1712627589.7449543}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 296.411}}, "t": 1712627590.2600842}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 286.052}}, "t": 1712627590.7843578}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.95734499528146, "units": "Tflops", "t": 1712627592.0626163}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 303.219}}, "t": 1712627591.5287223}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 307.973}}, "t": 1712627592.0508308}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.712316758066738, "units": "Tflops", "t": 1712627593.2379572}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 294.439}}, "t": 1712627592.5785832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.94116531184463, "units": "Tflops", "t": 1712627594.3990755}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 298.341}}, "t": 1712627593.2557857}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 294.66}}, "t": 1712627593.775298}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 291.447}}, "t": 1712627594.291688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.948749504584608, "units": "Tflops", "t": 1712627595.5599654}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 296.107}}, "t": 1712627595.0113285}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 304.98}}, "t": 1712627595.5268705}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.76561312505308, "units": "Tflops", "t": 1712627596.7319705}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 296.607}}, "t": 1712627596.051426}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.937898470543086, "units": "Tflops", "t": 1712627597.89328}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 282.756}}, "t": 1712627596.7486646}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 307.689}}, "t": 1712627597.2683527}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 289.495}}, "t": 1712627597.786602}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.931274910011364, "units": "Tflops", "t": 1712627599.0551002}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 299.433}}, "t": 1712627598.5070398}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 295.347}}, "t": 1712627599.0299494}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.72629013281217, "units": "Tflops", "t": 1712627600.2295988}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 304.585}}, "t": 1712627599.5668726}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.941320903829443, "units": "Tflops", "t": 1712627601.3907275}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 308.37}}, "t": 1712627600.2587888}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 294.454}}, "t": 1712627600.7852318}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 281.206}}, "t": 1712627601.3060696}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.941484278164644, "units": "Tflops", "t": 1712627602.5519297}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 308.195}}, "t": 1712627602.007896}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 297.691}}, "t": 1712627602.5257967}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.801559585584876, "units": "Tflops", "t": 1712627603.7216969}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 298.927}}, "t": 1712627603.0475545}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.909039843417357, "units": "Tflops", "t": 1712627604.8849237}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 305.08}}, "t": 1712627603.7436087}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 290.952}}, "t": 1712627604.27057}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.927751230171655, "units": "Tflops", "t": 1712627606.046894}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 293.79}}, "t": 1712627605.4711866}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 288.609}}, "t": 1712627606.0063152}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.942332314041828, "units": "Tflops", "t": 1712627607.2079988}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.932149234359134, "units": "Tflops", "t": 1712627608.3696032}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 288.227}}, "t": 1712627607.218154}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 303.6}}, "t": 1712627607.7449229}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 305.177}}, "t": 1712627608.274435}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.928093051239802, "units": "Tflops", "t": 1712627609.5316248}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 295.138}}, "t": 1712627608.997089}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 305.748}}, "t": 1712627609.5214796}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.596434211249587, "units": "Tflops", "t": 1712627610.7142832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.940916369985754, "units": "Tflops", "t": 1712627611.8753595}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 157.292}}, "t": 1712627610.7244136}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 306.605}}, "t": 1712627611.2482624}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 291.348}}, "t": 1712627611.7727964}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.95763333443936, "units": "Tflops", "t": 1712627613.0355377}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 295.236}}, "t": 1712627612.5025983}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 303.998}}, "t": 1712627613.0276246}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.686957436776336, "units": "Tflops", "t": 1712627614.2124903}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.952288796439042, "units": "Tflops", "t": 1712627615.3728802}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 306.643}}, "t": 1712627614.2211208}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 295.442}}, "t": 1712627614.7478004}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 286.191}}, "t": 1712627615.2783356}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.94735984717572, "units": "Tflops", "t": 1712627616.533688}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 295.522}}, "t": 1712627615.9952881}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 294.469}}, "t": 1712627616.5207987}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.716353140701354, "units": "Tflops", "t": 1712627617.7090228}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.958030789277903, "units": "Tflops", "t": 1712627618.869053}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 293.894}}, "t": 1712627617.7162156}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 298.066}}, "t": 1712627618.2462296}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 283.351}}, "t": 1712627618.7710052}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.950980389320357, "units": "Tflops", "t": 1712627620.0296707}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 294.087}}, "t": 1712627619.4930956}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 300.979}}, "t": 1712627620.0288384}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.798926930613042, "units": "Tflops", "t": 1712627621.199601}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.94564738454575, "units": "Tflops", "t": 1712627622.3604128}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 280.515}}, "t": 1712627621.2143836}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 294.073}}, "t": 1712627621.733902}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 282.193}}, "t": 1712627622.25608}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.942655210046485, "units": "Tflops", "t": 1712627623.5215414}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 298.922}}, "t": 1712627622.9795845}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 294.853}}, "t": 1712627623.5048218}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.76791566083021, "units": "Tflops", "t": 1712627624.6934042}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.94872225444085, "units": "Tflops", "t": 1712627625.8540266}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 305.371}}, "t": 1712627624.7016928}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 303.887}}, "t": 1712627625.2300408}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 281.508}}, "t": 1712627625.7701864}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 18.937890693694236, "units": "Tflops", "t": 1712627627.0154161}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 306.909}}, "t": 1712627626.4884615}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 77, "power": 299.819}}, "t": 1712627627.0038033}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32"], "time": 1712627628.108826, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/llama.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/llama.D0.data new file mode 100644 index 000000000..de6bfcc82 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/llama.D0.data @@ -0,0 +1,457 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/llm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "llm", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 800, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama", "plan": {"method": "per_gpu"}, "tags": ["llm", "nlp"], "weight": 1.0, "name": "llama", "tag": ["llama", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 33, "power": 52.237, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 33, "power": 53.179, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712627121.200254, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["python", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1712627123.8831167}, "pipe": null} +{"event": "line", "data": "Dataset\n", "pipe": "stderr"} +{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/datasets/table.py:1421: FutureWarning: promote has been superseded by mode='default'.\n", "pipe": "stderr"} +{"event": "line", "data": " table = cls._concat_blocks(blocks, axis=0)\n", "pipe": "stderr"} +{"event": "line", "data": "Tokenizer\n", "pipe": "stderr"} +{"event": "line", "data": "Model", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "Pipeline", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "Starting", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =10.984142780303955, total / elapsed =190.8205348312123 in_token_count =9 out_token_count =2087", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 190.8205348312123, "units": "Tok/s", "t": 1712627198.4517581}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27227.25, 81920.0], "load": 0, "temperature": 33, "power": 70.326}}, "t": 1712627187.5594869}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27249.25, 81920.0], "load": 0, "temperature": 33, "power": 70.033}}, "t": 1712627188.073008}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27279.25, 81920.0], "load": 0.16, "temperature": 35, "power": 235.114}}, "t": 1712627188.5910473}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27325.25, 81920.0], "load": 0.95, "temperature": 36, "power": 233.544}}, "t": 1712627189.113233}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27391.25, 81920.0], "load": 0.96, "temperature": 36, "power": 240.084}}, "t": 1712627189.6308637}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27551.25, 81920.0], "load": 0.96, "temperature": 37, "power": 213.604}}, "t": 1712627190.1483607}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27611.25, 81920.0], "load": 0.96, "temperature": 37, "power": 235.425}}, "t": 1712627190.670242}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27671.25, 81920.0], "load": 0.96, "temperature": 37, "power": 238.396}}, "t": 1712627191.1892416}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27711.25, 81920.0], "load": 0.96, "temperature": 37, "power": 242.794}}, "t": 1712627191.7114475}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27751.25, 81920.0], "load": 0.96, "temperature": 38, "power": 232.883}}, "t": 1712627192.2280073}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27771.25, 81920.0], "load": 0, "temperature": 35, "power": 71.115}}, "t": 1712627193.0492992}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27811.25, 81920.0], "load": 0.96, "temperature": 38, "power": 240.047}}, "t": 1712627193.5706663}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27911.25, 81920.0], "load": 0.96, "temperature": 38, "power": 240.204}}, "t": 1712627194.0993495}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27951.25, 81920.0], "load": 0.97, "temperature": 39, "power": 236.095}}, "t": 1712627194.8084514}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28031.25, 81920.0], "load": 0.96, "temperature": 39, "power": 233.376}}, "t": 1712627195.332859}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28031.25, 81920.0], "load": 0.96, "temperature": 39, "power": 236.408}}, "t": 1712627195.8610482}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28091.25, 81920.0], "load": 0.61, "temperature": 37, "power": 71.821}}, "t": 1712627196.5601969}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28251.25, 81920.0], "load": 0.97, "temperature": 39, "power": 243.873}}, "t": 1712627197.0873473}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28251.25, 81920.0], "load": 0.96, "temperature": 40, "power": 242.892}}, "t": 1712627197.6938565}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =5.318662881851196, total / elapsed =373.5901379988219 in_token_count =185 out_token_count =1802\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 373.5901379988219, "units": "Tok/s", "t": 1712627203.7704322}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 41, "power": 303.507}}, "t": 1712627198.5914445}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 41, "power": 242.421}}, "t": 1712627199.126338}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 41, "power": 244.501}}, "t": 1712627200.5270858}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 41, "power": 243.832}}, "t": 1712627201.0596309}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 42, "power": 239.999}}, "t": 1712627202.4702334}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.97, "temperature": 42, "power": 239.896}}, "t": 1712627202.993646}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 42, "power": 233.532}}, "t": 1712627203.5317025}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =6.775692939758301, total / elapsed =304.47070408028117 in_token_count =121 out_token_count =1942\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 304.47070408028117, "units": "Tok/s", "t": 1712627210.5461445}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.95, "temperature": 43, "power": 242.901}}, "t": 1712627204.4311566}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 43, "power": 239.548}}, "t": 1712627204.9514678}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 43, "power": 242.131}}, "t": 1712627205.4773293}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 44, "power": 244.464}}, "t": 1712627206.3952322}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 44, "power": 245.095}}, "t": 1712627206.9132748}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 44, "power": 245.662}}, "t": 1712627207.4492033}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 44, "power": 238.638}}, "t": 1712627208.357319}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 45, "power": 242.453}}, "t": 1712627208.8876019}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.97, "temperature": 45, "power": 242.038}}, "t": 1712627209.425708}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.97, "temperature": 45, "power": 247.026}}, "t": 1712627210.3505049}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =6.634033203125, total / elapsed =311.72590439038754 in_token_count =127 out_token_count =1941\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 311.72590439038754, "units": "Tok/s", "t": 1712627217.180198}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 46, "power": 247.578}}, "t": 1712627210.8858013}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 45, "power": 245.557}}, "t": 1712627212.2994363}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 46, "power": 241.658}}, "t": 1712627212.8308244}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 46, "power": 245.355}}, "t": 1712627213.373888}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 46, "power": 241.125}}, "t": 1712627214.2803266}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 47, "power": 242.652}}, "t": 1712627214.801529}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.97, "temperature": 47, "power": 241.766}}, "t": 1712627215.327192}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 47, "power": 246.592}}, "t": 1712627216.2320771}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.97, "temperature": 47, "power": 245.817}}, "t": 1712627216.7599757}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.373608827590942, total / elapsed =216.24542236428567 in_token_count =6 out_token_count =2021\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 216.24542236428567, "units": "Tok/s", "t": 1712627226.5538306}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.97, "temperature": 48, "power": 249.21}}, "t": 1712627217.2992706}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 48, "power": 246.872}}, "t": 1712627218.199526}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 48, "power": 238.182}}, "t": 1712627218.735803}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 48, "power": 246.036}}, "t": 1712627220.131409}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.97, "temperature": 49, "power": 246.291}}, "t": 1712627220.656478}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 49, "power": 247.561}}, "t": 1712627221.1842337}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 49, "power": 247.41}}, "t": 1712627222.108674}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 49, "power": 249.031}}, "t": 1712627222.6428437}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.97, "temperature": 50, "power": 250.029}}, "t": 1712627224.0416713}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 50, "power": 249.893}}, "t": 1712627224.5656812}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 50, "power": 243.409}}, "t": 1712627225.0993228}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.97, "temperature": 50, "power": 250.905}}, "t": 1712627226.0289423}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.674933910369873, total / elapsed =601.3713590233161 in_token_count =256 out_token_count =1954\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 601.3713590233161, "units": "Tok/s", "t": 1712627230.22878}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28275.25, 81920.0], "load": 0.97, "temperature": 50, "power": 247.876}}, "t": 1712627226.5607846}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28307.25, 81920.0], "load": 0.97, "temperature": 51, "power": 249.648}}, "t": 1712627227.9500887}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28307.25, 81920.0], "load": 0.96, "temperature": 51, "power": 250.959}}, "t": 1712627228.4829729}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28307.25, 81920.0], "load": 0.96, "temperature": 51, "power": 244.2}}, "t": 1712627229.011301}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28307.25, 81920.0], "load": 0.97, "temperature": 51, "power": 253.144}}, "t": 1712627229.9113736}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =1.7509753704071045, total / elapsed =1296.991401696295 in_token_count =340 out_token_count =1931\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 1296.991401696295, "units": "Tok/s", "t": 1712627231.9797707}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 54, "power": 309.224}}, "t": 1712627230.4302735}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 52, "power": 253.851}}, "t": 1712627230.96395}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 52, "power": 250.603}}, "t": 1712627231.8740215}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =7.352792024612427, total / elapsed =297.9820444622857 in_token_count =95 out_token_count =2096\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 297.9820444622857, "units": "Tok/s", "t": 1712627239.3325796}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 52, "power": 250.795}}, "t": 1712627232.408795}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 53, "power": 243.994}}, "t": 1712627233.808296}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 53, "power": 251.078}}, "t": 1712627234.3468804}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 53, "power": 252.396}}, "t": 1712627235.742521}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 53, "power": 254.9}}, "t": 1712627236.2794878}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 54, "power": 250.388}}, "t": 1712627237.6681178}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 54, "power": 246.137}}, "t": 1712627238.1912625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 54, "power": 255.223}}, "t": 1712627238.7245781}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.402382612228394, total / elapsed =206.11796817112167 in_token_count =5 out_token_count =1933\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 206.11796817112167, "units": "Tok/s", "t": 1712627248.7349823}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.94, "temperature": 54, "power": 255.626}}, "t": 1712627239.6454113}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 54, "power": 252.375}}, "t": 1712627240.1754851}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 54, "power": 249.892}}, "t": 1712627241.581275}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 55, "power": 250.941}}, "t": 1712627242.1197262}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 55, "power": 254.765}}, "t": 1712627243.5203583}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 55, "power": 246.269}}, "t": 1712627244.0544202}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 55, "power": 252.088}}, "t": 1712627245.4554446}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 55, "power": 252.499}}, "t": 1712627245.9919329}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 56, "power": 256.978}}, "t": 1712627247.3975387}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 56, "power": 257.146}}, "t": 1712627247.9342651}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.7472434043884277, total / elapsed =543.3327329672857 in_token_count =253 out_token_count =1783\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 543.3327329672857, "units": "Tok/s", "t": 1712627252.482245}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 56, "power": 247.799}}, "t": 1712627249.3130393}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 56, "power": 254.245}}, "t": 1712627249.8340454}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 56, "power": 256.309}}, "t": 1712627250.3770585}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 57, "power": 247.857}}, "t": 1712627251.2767434}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 57, "power": 258.499}}, "t": 1712627251.8020983}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 57, "power": 256.988}}, "t": 1712627252.3253372}, "pipe": "data"} +{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.1127376556396484, total / elapsed =665.009464016211 in_token_count =282 out_token_count =1788\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 665.009464016211, "units": "Tok/s", "t": 1712627255.5949981}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 57, "power": 251.12}}, "t": 1712627253.1960156}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 57, "power": 254.267}}, "t": 1712627253.7195308}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 57, "power": 254.019}}, "t": 1712627254.2555306}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 57, "power": 255.72}}, "t": 1712627255.16233}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.6761136054992676, total / elapsed =545.140932805266 in_token_count =256 out_token_count =1748\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 545.140932805266, "units": "Tok/s", "t": 1712627259.2711363}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 60, "power": 252.609}}, "t": 1712627255.682035}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 58, "power": 256.404}}, "t": 1712627256.2141047}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 58, "power": 256.122}}, "t": 1712627257.125295}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 58, "power": 255.148}}, "t": 1712627257.6549182}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 58, "power": 251.045}}, "t": 1712627259.0575001}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.399715185165405, total / elapsed =209.26165966223232 in_token_count =5 out_token_count =1962\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 209.26165966223232, "units": "Tok/s", "t": 1712627268.6708684}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.95, "temperature": 58, "power": 259.551}}, "t": 1712627259.5939643}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 58, "power": 259.459}}, "t": 1712627260.1319911}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 58, "power": 256.177}}, "t": 1712627261.0364761}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 59, "power": 260.101}}, "t": 1712627261.5769205}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 59, "power": 260.622}}, "t": 1712627262.9675303}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 59, "power": 250.472}}, "t": 1712627263.494872}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 59, "power": 254.792}}, "t": 1712627264.9078496}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 59, "power": 258.338}}, "t": 1712627265.4415643}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 59, "power": 255.737}}, "t": 1712627266.8338394}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 60, "power": 261.0}}, "t": 1712627267.3624766}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 60, "power": 261.197}}, "t": 1712627267.9012272}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =1.5425589084625244, total / elapsed =1341.277787609539 in_token_count =349 out_token_count =1720\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 1341.277787609539, "units": "Tok/s", "t": 1712627270.2134469}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 60, "power": 261.514}}, "t": 1712627268.7205038}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28393.25, 81920.0], "load": 0.96, "temperature": 60, "power": 249.556}}, "t": 1712627269.254943}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =2.988492727279663, total / elapsed =758.9109986104913 in_token_count =287 out_token_count =1981\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 758.9109986104913, "units": "Tok/s", "t": 1712627273.2019556}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28393.25, 81920.0], "load": 0.97, "temperature": 61, "power": 256.059}}, "t": 1712627270.6547198}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28393.25, 81920.0], "load": 0.96, "temperature": 60, "power": 260.064}}, "t": 1712627271.1836016}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28393.25, 81920.0], "load": 0.96, "temperature": 60, "power": 258.69}}, "t": 1712627271.71774}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28393.25, 81920.0], "load": 0.97, "temperature": 61, "power": 266.669}}, "t": 1712627272.6234975}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28393.25, 81920.0], "load": 0.97, "temperature": 61, "power": 263.256}}, "t": 1712627273.1582875}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.350925922393799, total / elapsed =218.6949203717452 in_token_count =7 out_token_count =2038\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 218.6949203717452, "units": "Tok/s", "t": 1712627282.5529}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28393.25, 81920.0], "load": 0.95, "temperature": 61, "power": 265.517}}, "t": 1712627274.560826}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28393.25, 81920.0], "load": 0.96, "temperature": 61, "power": 259.455}}, "t": 1712627275.091414}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28393.25, 81920.0], "load": 0.96, "temperature": 61, "power": 265.136}}, "t": 1712627275.620919}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28393.25, 81920.0], "load": 0.96, "temperature": 61, "power": 262.716}}, "t": 1712627276.5235047}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28393.25, 81920.0], "load": 0.95, "temperature": 61, "power": 265.216}}, "t": 1712627277.062297}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28393.25, 81920.0], "load": 0.96, "temperature": 61, "power": 254.876}}, "t": 1712627278.4669008}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28393.25, 81920.0], "load": 0.96, "temperature": 62, "power": 261.227}}, "t": 1712627278.9945743}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28393.25, 81920.0], "load": 0.96, "temperature": 62, "power": 263.723}}, "t": 1712627279.5328202}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28393.25, 81920.0], "load": 0.97, "temperature": 62, "power": 259.055}}, "t": 1712627280.4418285}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28393.25, 81920.0], "load": 0.96, "temperature": 62, "power": 259.221}}, "t": 1712627280.971695}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28393.25, 81920.0], "load": 0.96, "temperature": 62, "power": 266.732}}, "t": 1712627282.335325}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =1.2000916004180908, total / elapsed =1829.8603200246982 in_token_count =363 out_token_count =1833\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 1829.8603200246982, "units": "Tok/s", "t": 1712627283.7530043}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 65, "power": 301.619}}, "t": 1712627282.8532424}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 62, "power": 261.006}}, "t": 1712627283.373297}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.346771717071533, total / elapsed =211.08892564438997 in_token_count =7 out_token_count =1966\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 211.08892564438997, "units": "Tok/s", "t": 1712627293.0997968}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.95, "temperature": 62, "power": 267.749}}, "t": 1712627284.295784}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 62, "power": 261.325}}, "t": 1712627284.8107748}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 62, "power": 268.624}}, "t": 1712627285.3469617}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 63, "power": 264.451}}, "t": 1712627286.255276}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 63, "power": 256.787}}, "t": 1712627286.7886276}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 63, "power": 264.541}}, "t": 1712627288.1787505}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 63, "power": 266.682}}, "t": 1712627288.697229}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 63, "power": 266.908}}, "t": 1712627289.218516}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 63, "power": 268.464}}, "t": 1712627290.1574984}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 63, "power": 267.493}}, "t": 1712627290.6908958}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 63, "power": 265.813}}, "t": 1712627292.09975}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 63, "power": 261.808}}, "t": 1712627292.6290097}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =1.6644444465637207, total / elapsed =1247.263015768381 in_token_count =344 out_token_count =1732\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 1247.263015768381, "units": "Tok/s", "t": 1712627294.7642574}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 64, "power": 268.847}}, "t": 1712627293.9813225}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 64, "power": 266.37}}, "t": 1712627294.520548}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =6.739387035369873, total / elapsed =305.9625436524336 in_token_count =122 out_token_count =1940\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 305.9625436524336, "units": "Tok/s", "t": 1712627301.5036745}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 64, "power": 262.102}}, "t": 1712627295.921176}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 64, "power": 269.228}}, "t": 1712627296.454145}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 64, "power": 262.397}}, "t": 1712627297.8664126}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 64, "power": 259.852}}, "t": 1712627298.400627}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 64, "power": 269.725}}, "t": 1712627299.7993627}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 64, "power": 269.731}}, "t": 1712627300.33289}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.365264892578125, total / elapsed =214.6228668441513 in_token_count =6 out_token_count =2004\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 214.6228668441513, "units": "Tok/s", "t": 1712627310.8689566}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.95, "temperature": 65, "power": 268.165}}, "t": 1712627301.7345097}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 65, "power": 262.535}}, "t": 1712627302.2707016}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 65, "power": 268.486}}, "t": 1712627302.812225}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 65, "power": 267.688}}, "t": 1712627303.7204194}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 65, "power": 258.958}}, "t": 1712627304.2526274}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 65, "power": 267.389}}, "t": 1712627305.6615698}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 65, "power": 269.748}}, "t": 1712627306.1944358}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 65, "power": 259.658}}, "t": 1712627307.5986092}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 65, "power": 262.978}}, "t": 1712627308.134227}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 65, "power": 270.521}}, "t": 1712627309.5406272}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 66, "power": 270.23}}, "t": 1712627310.0804653}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =7.4467082023620605, total / elapsed =257.5634693718252 in_token_count =91 out_token_count =1827\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 257.5634693718252, "units": "Tok/s", "t": 1712627318.3156872}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 66, "power": 261.718}}, "t": 1712627311.4593117}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 66, "power": 269.148}}, "t": 1712627311.994183}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 66, "power": 267.669}}, "t": 1712627313.3946335}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 66, "power": 270.229}}, "t": 1712627313.9311905}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 66, "power": 270.423}}, "t": 1712627315.3293316}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 66, "power": 269.762}}, "t": 1712627315.848662}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 66, "power": 269.805}}, "t": 1712627316.37701}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 66, "power": 266.603}}, "t": 1712627317.29116}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 66, "power": 272.377}}, "t": 1712627317.8208904}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =5.8540215492248535, total / elapsed =368.8062952699378 in_token_count =162 out_token_count =1997\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 368.8062952699378, "units": "Tok/s", "t": 1712627324.169726}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 66, "power": 269.38}}, "t": 1712627319.229597}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 67, "power": 272.161}}, "t": 1712627319.7528045}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 67, "power": 275.008}}, "t": 1712627320.2861385}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 67, "power": 274.294}}, "t": 1712627321.2009616}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 67, "power": 275.776}}, "t": 1712627321.7388525}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 67, "power": 269.761}}, "t": 1712627323.1363237}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 67, "power": 271.875}}, "t": 1712627323.6648724}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =5.292556047439575, total / elapsed =402.2630995150187 in_token_count =186 out_token_count =1943\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 402.2630995150187, "units": "Tok/s", "t": 1712627329.4622993}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 67, "power": 277.747}}, "t": 1712627324.1893153}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 67, "power": 266.714}}, "t": 1712627325.078574}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 67, "power": 269.335}}, "t": 1712627325.6118207}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 67, "power": 271.111}}, "t": 1712627327.012435}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 67, "power": 269.947}}, "t": 1712627327.5393684}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 68, "power": 266.893}}, "t": 1712627328.0692456}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 68, "power": 274.908}}, "t": 1712627328.9689333}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =6.85827374458313, total / elapsed =294.826376914021 in_token_count =117 out_token_count =1905\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 294.826376914021, "units": "Tok/s", "t": 1712627336.3205924}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 68, "power": 273.079}}, "t": 1712627329.5035186}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 68, "power": 274.719}}, "t": 1712627330.9123456}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 68, "power": 271.433}}, "t": 1712627331.4489484}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 68, "power": 274.381}}, "t": 1712627332.869737}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 68, "power": 274.147}}, "t": 1712627333.3974185}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 68, "power": 274.754}}, "t": 1712627333.9236906}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 68, "power": 272.229}}, "t": 1712627334.8374355}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 68, "power": 277.753}}, "t": 1712627335.3719697}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.37232255935669, total / elapsed =214.5679459135083 in_token_count =6 out_token_count =2005\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 214.5679459135083, "units": "Tok/s", "t": 1712627345.6929355}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 68, "power": 278.537}}, "t": 1712627336.772878}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 68, "power": 273.256}}, "t": 1712627337.3075752}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 69, "power": 276.487}}, "t": 1712627338.7001014}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 69, "power": 273.431}}, "t": 1712627339.2336867}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 69, "power": 278.149}}, "t": 1712627340.6346474}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 69, "power": 278.623}}, "t": 1712627341.1564894}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 69, "power": 272.104}}, "t": 1712627341.6927152}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 69, "power": 267.807}}, "t": 1712627342.5993116}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 69, "power": 266.039}}, "t": 1712627343.1345084}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 69, "power": 278.149}}, "t": 1712627344.5433278}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 69, "power": 275.912}}, "t": 1712627345.0702705}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 69, "power": 276.074}}, "t": 1712627345.6062047}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =7.447418928146362, total / elapsed =270.96636022089245 in_token_count =91 out_token_count =1927\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 270.96636022089245, "units": "Tok/s", "t": 1712627353.1403728}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 69, "power": 275.014}}, "t": 1712627346.5322847}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 69, "power": 273.513}}, "t": 1712627347.0599}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 69, "power": 278.1}}, "t": 1712627348.4654822}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 69, "power": 277.841}}, "t": 1712627349.0013933}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 69, "power": 277.746}}, "t": 1712627350.4114268}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 69, "power": 277.537}}, "t": 1712627350.9330761}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 69, "power": 278.414}}, "t": 1712627351.4604}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 69, "power": 274.502}}, "t": 1712627352.3873687}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 69, "power": 269.024}}, "t": 1712627352.925691}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.298323392868042, total / elapsed =205.62846861903444 in_token_count =9 out_token_count =1903\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 205.62846861903444, "units": "Tok/s", "t": 1712627362.438719}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.95, "temperature": 70, "power": 277.453}}, "t": 1712627354.327347}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 274.816}}, "t": 1712627354.85888}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 272.028}}, "t": 1712627356.247231}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 273.809}}, "t": 1712627356.768675}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 70, "power": 273.743}}, "t": 1712627357.2935548}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 276.765}}, "t": 1712627358.2280474}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 280.274}}, "t": 1712627358.764695}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 274.844}}, "t": 1712627360.1595912}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 70, "power": 276.457}}, "t": 1712627360.6892312}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 70, "power": 276.068}}, "t": 1712627362.0787144}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.3264646530151367, total / elapsed =566.0664388221388 in_token_count =273 out_token_count =1610\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 566.0664388221388, "units": "Tok/s", "t": 1712627365.7652004}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.95, "temperature": 72, "power": 298.223}}, "t": 1712627362.6089702}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 70, "power": 277.347}}, "t": 1712627363.1500478}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 70, "power": 274.989}}, "t": 1712627364.059039}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 70, "power": 275.694}}, "t": 1712627364.5817046}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 70, "power": 272.377}}, "t": 1712627365.1134045}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.421717882156372, total / elapsed =618.4028236326992 in_token_count =269 out_token_count =1847\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 618.4028236326992, "units": "Tok/s", "t": 1712627369.1869335}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 73, "power": 304.36}}, "t": 1712627366.018227}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 276.668}}, "t": 1712627366.5391452}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 272.58}}, "t": 1712627367.0770085}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 70, "power": 281.232}}, "t": 1712627367.994612}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 279.9}}, "t": 1712627368.5322225}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =4.701564311981201, total / elapsed =370.7276736719813 in_token_count =213 out_token_count =1530\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 370.7276736719813, "units": "Tok/s", "t": 1712627373.8885167}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 280.484}}, "t": 1712627369.8963945}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 281.05}}, "t": 1712627370.4272184}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 277.916}}, "t": 1712627371.8355455}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 276.573}}, "t": 1712627372.362835}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 70, "power": 280.48}}, "t": 1712627373.7746644}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.247115135192871, total / elapsed =215.9592446729439 in_token_count =11 out_token_count =1986\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 215.9592446729439, "units": "Tok/s", "t": 1712627383.135651}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.95, "temperature": 70, "power": 270.814}}, "t": 1712627374.3069766}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 278.625}}, "t": 1712627375.706805}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 277.82}}, "t": 1712627376.2316763}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 280.01}}, "t": 1712627376.7686708}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 283.864}}, "t": 1712627377.6794941}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 279.943}}, "t": 1712627378.215801}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 280.342}}, "t": 1712627379.6177475}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 280.402}}, "t": 1712627380.1535113}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 279.02}}, "t": 1712627381.557264}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 70, "power": 277.355}}, "t": 1712627382.094847}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =6.167431116104126, total / elapsed =316.014879341069 in_token_count =148 out_token_count =1801\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 316.014879341069, "units": "Tok/s", "t": 1712627389.3030994}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.98, "temperature": 70, "power": 248.444}}, "t": 1712627383.4904804}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 270.554}}, "t": 1712627384.0267282}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 276.184}}, "t": 1712627385.4131548}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 279.212}}, "t": 1712627385.9302573}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 280.655}}, "t": 1712627386.4618967}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 70, "power": 278.314}}, "t": 1712627387.3709702}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 70, "power": 279.598}}, "t": 1712627387.9057953}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =7.020972728729248, total / elapsed =283.0092177782372 in_token_count =110 out_token_count =1877\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 283.0092177782372, "units": "Tok/s", "t": 1712627396.3240905}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 280.703}}, "t": 1712627389.3075767}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 71, "power": 281.254}}, "t": 1712627389.840597}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 70, "power": 274.234}}, "t": 1712627391.2416375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 278.606}}, "t": 1712627391.785467}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 280.973}}, "t": 1712627393.1855962}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 281.449}}, "t": 1712627393.7180626}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 71, "power": 278.124}}, "t": 1712627395.1290936}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 71, "power": 278.256}}, "t": 1712627395.6641042}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.36374306678772, total / elapsed =210.7063367637711 in_token_count =6 out_token_count =1967\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 210.7063367637711, "units": "Tok/s", "t": 1712627405.6878755}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 278.71}}, "t": 1712627397.0529358}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 278.043}}, "t": 1712627397.5800714}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 269.703}}, "t": 1712627398.106153}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 279.712}}, "t": 1712627399.0097818}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 268.408}}, "t": 1712627399.530024}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 280.48}}, "t": 1712627400.0617874}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 280.192}}, "t": 1712627400.971797}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 275.256}}, "t": 1712627401.509089}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 276.639}}, "t": 1712627402.9005144}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 281.54}}, "t": 1712627403.4323704}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 281.943}}, "t": 1712627404.8307228}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 273.353}}, "t": 1712627405.3578827}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.207664728164673, total / elapsed =627.2475992686444 in_token_count =278 out_token_count =1734\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 627.2475992686444, "units": "Tok/s", "t": 1712627408.8955548}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.94, "temperature": 73, "power": 302.026}}, "t": 1712627405.8958628}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 280.957}}, "t": 1712627406.8040545}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 279.787}}, "t": 1712627407.3317323}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 273.024}}, "t": 1712627408.734551}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.377283334732056, total / elapsed =211.78841772266313 in_token_count =6 out_token_count =1980\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 211.78841772266313, "units": "Tok/s", "t": 1712627418.272864}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 271.849}}, "t": 1712627409.252704}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 275.034}}, "t": 1712627409.7827556}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 279.676}}, "t": 1712627410.7006717}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 278.364}}, "t": 1712627411.233991}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.95, "temperature": 71, "power": 274.09}}, "t": 1712627412.646625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 277.062}}, "t": 1712627413.1875844}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 280.188}}, "t": 1712627414.5829458}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 280.473}}, "t": 1712627415.111993}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 280.089}}, "t": 1712627415.6488461}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 278.34}}, "t": 1712627416.5606446}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 71, "power": 275.877}}, "t": 1712627417.0894415}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =1.6657466888427734, total / elapsed =1314.1253797242969 in_token_count =344 out_token_count =1845\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 1314.1253797242969, "units": "Tok/s", "t": 1712627419.938627}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.95, "temperature": 73, "power": 319.334}}, "t": 1712627418.4956427}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 71, "power": 279.598}}, "t": 1712627419.0361388}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =7.144115686416626, total / elapsed =294.6480841571918 in_token_count =105 out_token_count =2000\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 294.6480841571918, "units": "Tok/s", "t": 1712627427.0827641}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 281.529}}, "t": 1712627420.429608}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 275.789}}, "t": 1712627420.9607923}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 277.487}}, "t": 1712627422.3711212}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 271.065}}, "t": 1712627422.9089682}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 273.952}}, "t": 1712627424.3126347}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 282.239}}, "t": 1712627424.8442817}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 282.04}}, "t": 1712627425.3714187}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 275.513}}, "t": 1712627426.275985}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 273.379}}, "t": 1712627426.808864}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.125789642333984, total / elapsed =213.78971863973604 in_token_count =17 out_token_count =1934\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 213.78971863973604, "units": "Tok/s", "t": 1712627436.208571}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.94, "temperature": 71, "power": 273.257}}, "t": 1712627428.2182488}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 277.644}}, "t": 1712627428.7541614}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.95, "temperature": 71, "power": 278.544}}, "t": 1712627429.2938602}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 272.899}}, "t": 1712627430.2046425}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 280.877}}, "t": 1712627430.7361398}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 273.732}}, "t": 1712627431.2697484}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 268.184}}, "t": 1712627432.1759741}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 275.495}}, "t": 1712627432.716886}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 281.739}}, "t": 1712627434.11236}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 281.824}}, "t": 1712627434.6336105}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 71, "power": 280.198}}, "t": 1712627435.160865}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 274.444}}, "t": 1712627436.0735555}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =4.628559112548828, total / elapsed =469.04445794243867 in_token_count =216 out_token_count =1955\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 469.04445794243867, "units": "Tok/s", "t": 1712627440.8371482}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.99, "temperature": 71, "power": 266.996}}, "t": 1712627436.6080759}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 278.915}}, "t": 1712627438.0226274}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 278.527}}, "t": 1712627438.5554886}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 282.412}}, "t": 1712627439.909997}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 71, "power": 281.809}}, "t": 1712627440.42447}, "pipe": "data"} +{"event": "end", "data": {"command": ["python", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1712627441.7174315, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/llama.D1.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/llama.D1.data new file mode 100644 index 000000000..642bb6133 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/llama.D1.data @@ -0,0 +1,449 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/llm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "llm", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 800, "voir": {"options": {"stop": 30, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama", "plan": {"method": "per_gpu"}, "tags": ["llm", "nlp"], "weight": 1.0, "name": "llama", "tag": ["llama", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 33, "power": 52.238, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 33, "power": 53.081, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712627123.872568, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["python", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1712627123.883691}, "pipe": null} +{"event": "line", "data": "Dataset\n", "pipe": "stderr"} +{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/datasets/table.py:1421: FutureWarning: promote has been superseded by mode='default'.\n", "pipe": "stderr"} +{"event": "line", "data": " table = cls._concat_blocks(blocks, axis=0)\n", "pipe": "stderr"} +{"event": "line", "data": "Tokenizer\n", "pipe": "stderr"} +{"event": "line", "data": "Model\n", "pipe": "stderr"} +{"event": "line", "data": "Pipeline\n", "pipe": "stderr"} +{"event": "line", "data": "Starting\n", "pipe": "stderr"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =11.133292436599731, total / elapsed =180.18030258555405 in_token_count =9 out_token_count =1997\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 180.18030258555405, "units": "Tok/s", "t": 1712627198.4432938}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27227.25, 81920.0], "load": 0, "temperature": 33, "power": 70.85}}, "t": 1712627187.3963943}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27249.25, 81920.0], "load": 0, "temperature": 34, "power": 71.351}}, "t": 1712627187.9081094}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27277.25, 81920.0], "load": 0.95, "temperature": 36, "power": 233.74}}, "t": 1712627188.4236422}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27325.25, 81920.0], "load": 0.93, "temperature": 37, "power": 234.9}}, "t": 1712627188.9423943}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27371.25, 81920.0], "load": 0.96, "temperature": 37, "power": 232.249}}, "t": 1712627189.4589431}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27571.25, 81920.0], "load": 0.96, "temperature": 38, "power": 227.415}}, "t": 1712627189.977323}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27611.25, 81920.0], "load": 0.96, "temperature": 38, "power": 225.219}}, "t": 1712627190.498085}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27671.25, 81920.0], "load": 0.97, "temperature": 38, "power": 236.567}}, "t": 1712627191.018785}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27711.25, 81920.0], "load": 0.96, "temperature": 39, "power": 239.422}}, "t": 1712627191.5373945}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27751.25, 81920.0], "load": 0.96, "temperature": 39, "power": 236.854}}, "t": 1712627192.0564291}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27851.25, 81920.0], "load": 0.88, "temperature": 39, "power": 93.627}}, "t": 1712627193.04707}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27911.25, 81920.0], "load": 0.96, "temperature": 39, "power": 235.104}}, "t": 1712627193.5661309}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28011.25, 81920.0], "load": 0.96, "temperature": 40, "power": 236.885}}, "t": 1712627194.0967364}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28031.25, 81920.0], "load": 0, "temperature": 37, "power": 76.619}}, "t": 1712627194.814476}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28031.25, 81920.0], "load": 0.97, "temperature": 40, "power": 234.52}}, "t": 1712627195.33422}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28111.25, 81920.0], "load": 0.97, "temperature": 38, "power": 236.403}}, "t": 1712627196.5581481}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28251.25, 81920.0], "load": 0.97, "temperature": 40, "power": 238.607}}, "t": 1712627197.0842957}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28251.25, 81920.0], "load": 0.96, "temperature": 41, "power": 238.285}}, "t": 1712627197.6970005}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =5.289296865463257, total / elapsed =366.7784299775897 in_token_count =185 out_token_count =1755\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 366.7784299775897, "units": "Tok/s", "t": 1712627203.7326164}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 41, "power": 242.745}}, "t": 1712627198.5911274}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 42, "power": 232.107}}, "t": 1712627199.125769}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 42, "power": 244.196}}, "t": 1712627200.5274546}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.97, "temperature": 43, "power": 242.904}}, "t": 1712627201.0606368}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 43, "power": 242.778}}, "t": 1712627202.4648876}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.97, "temperature": 43, "power": 240.063}}, "t": 1712627202.9876995}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.97, "temperature": 44, "power": 239.946}}, "t": 1712627203.5200565}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =6.743638753890991, total / elapsed =303.1004587576164 in_token_count =121 out_token_count =1923\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 303.1004587576164, "units": "Tok/s", "t": 1712627210.4762807}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 44, "power": 241.054}}, "t": 1712627204.4352884}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 44, "power": 243.301}}, "t": 1712627204.9571054}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 44, "power": 236.778}}, "t": 1712627205.4863703}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.97, "temperature": 45, "power": 241.326}}, "t": 1712627206.3890486}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 45, "power": 243.947}}, "t": 1712627206.907714}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.97, "temperature": 45, "power": 239.953}}, "t": 1712627207.4400759}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 45, "power": 246.806}}, "t": 1712627208.3663735}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 46, "power": 245.921}}, "t": 1712627208.8890128}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.97, "temperature": 46, "power": 245.707}}, "t": 1712627209.4285564}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.97, "temperature": 46, "power": 244.578}}, "t": 1712627210.348331}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =6.604897499084473, total / elapsed =304.3196355853536 in_token_count =127 out_token_count =1883\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 304.3196355853536, "units": "Tok/s", "t": 1712627217.0812047}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.98, "temperature": 47, "power": 243.857}}, "t": 1712627210.8848298}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 47, "power": 244.37}}, "t": 1712627211.4053812}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 47, "power": 237.594}}, "t": 1712627212.3105776}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.97, "temperature": 47, "power": 243.494}}, "t": 1712627212.8342016}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 47, "power": 246.611}}, "t": 1712627213.3755145}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 48, "power": 245.324}}, "t": 1712627214.2718096}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 48, "power": 244.866}}, "t": 1712627214.7956018}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.97, "temperature": 48, "power": 248.178}}, "t": 1712627215.3293488}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.97, "temperature": 48, "power": 245.746}}, "t": 1712627216.2341528}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.97, "temperature": 49, "power": 238.974}}, "t": 1712627216.7605412}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.333073377609253, total / elapsed =213.648807774699 in_token_count =6 out_token_count =1988\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 213.648807774699, "units": "Tok/s", "t": 1712627226.4143102}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.94, "temperature": 49, "power": 239.858}}, "t": 1712627217.295806}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 49, "power": 249.494}}, "t": 1712627218.2010992}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 49, "power": 240.224}}, "t": 1712627218.7363877}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 49, "power": 237.808}}, "t": 1712627220.1405003}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 50, "power": 250.279}}, "t": 1712627220.666441}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 50, "power": 242.77}}, "t": 1712627221.1997793}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.97, "temperature": 50, "power": 247.754}}, "t": 1712627222.1090858}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 50, "power": 247.449}}, "t": 1712627222.643661}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 51, "power": 250.464}}, "t": 1712627224.0326095}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 51, "power": 249.22}}, "t": 1712627224.5505924}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.96, "temperature": 51, "power": 254.658}}, "t": 1712627225.0817213}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28275.25, 81920.0], "load": 0.97, "temperature": 51, "power": 250.961}}, "t": 1712627226.028573}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.6564154624938965, total / elapsed =598.1267781064282 in_token_count =256 out_token_count =1931\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 598.1267781064282, "units": "Tok/s", "t": 1712627230.070747}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28307.25, 81920.0], "load": 0.97, "temperature": 54, "power": 309.275}}, "t": 1712627226.5604737}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28307.25, 81920.0], "load": 0.96, "temperature": 52, "power": 246.593}}, "t": 1712627227.9529328}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28307.25, 81920.0], "load": 0.97, "temperature": 52, "power": 251.315}}, "t": 1712627228.4833748}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28307.25, 81920.0], "load": 0.96, "temperature": 53, "power": 255.109}}, "t": 1712627229.9077115}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =1.7479701042175293, total / elapsed =1298.0771207272492 in_token_count =340 out_token_count =1929\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 1298.0771207272492, "units": "Tok/s", "t": 1712627231.8187382}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 1.0, "temperature": 56, "power": 295.424}}, "t": 1712627230.4278622}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 53, "power": 256.278}}, "t": 1712627230.961704}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =7.3194193840026855, total / elapsed =293.1926546920231 in_token_count =95 out_token_count =2051\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 293.1926546920231, "units": "Tok/s", "t": 1712627239.1381776}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 53, "power": 308.28}}, "t": 1712627231.8737035}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 53, "power": 251.059}}, "t": 1712627232.4083843}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 54, "power": 253.8}}, "t": 1712627233.8079102}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 54, "power": 252.971}}, "t": 1712627234.3460486}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 54, "power": 257.812}}, "t": 1712627235.74212}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 54, "power": 258.232}}, "t": 1712627236.279131}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 55, "power": 248.344}}, "t": 1712627237.6753862}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 55, "power": 256.454}}, "t": 1712627238.1951656}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 55, "power": 257.639}}, "t": 1712627238.7331712}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.357847690582275, total / elapsed =211.2665289465707 in_token_count =5 out_token_count =1972\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 211.2665289465707, "units": "Tok/s", "t": 1712627248.4960551}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 55, "power": 252.549}}, "t": 1712627239.644872}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 55, "power": 256.262}}, "t": 1712627240.1746895}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 56, "power": 250.658}}, "t": 1712627241.580896}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 56, "power": 256.87}}, "t": 1712627242.1191297}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 56, "power": 252.956}}, "t": 1712627243.5199373}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 56, "power": 257.558}}, "t": 1712627244.0538251}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 57, "power": 256.489}}, "t": 1712627245.4558742}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 57, "power": 257.063}}, "t": 1712627245.9923024}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 57, "power": 255.855}}, "t": 1712627247.3961527}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 57, "power": 255.599}}, "t": 1712627247.9334788}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.7319085597991943, total / elapsed =549.0488223833255 in_token_count =253 out_token_count =1796\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 549.0488223833255, "units": "Tok/s", "t": 1712627252.2279792}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 1.0, "temperature": 58, "power": 253.461}}, "t": 1712627249.3154273}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 58, "power": 256.678}}, "t": 1712627249.8360624}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 58, "power": 259.085}}, "t": 1712627250.3754077}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 58, "power": 257.785}}, "t": 1712627251.2843819}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 58, "power": 257.578}}, "t": 1712627251.807726}, "pipe": "data"} +{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.1006340980529785, total / elapsed =682.7635680486632 in_token_count =282 out_token_count =1835\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 682.7635680486632, "units": "Tok/s", "t": 1712627255.3286319}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 60, "power": 303.136}}, "t": 1712627252.3357608}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 59, "power": 257.618}}, "t": 1712627253.1943977}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 59, "power": 264.293}}, "t": 1712627253.7173343}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 59, "power": 265.659}}, "t": 1712627254.257167}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 59, "power": 258.077}}, "t": 1712627255.1562228}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.660116195678711, total / elapsed =566.3754616444888 in_token_count =256 out_token_count =1817\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 566.3754616444888, "units": "Tok/s", "t": 1712627258.988771}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 1.0, "temperature": 59, "power": 242.654}}, "t": 1712627255.6800184}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 59, "power": 264.162}}, "t": 1712627256.2107723}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 59, "power": 264.39}}, "t": 1712627257.1268997}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 59, "power": 266.742}}, "t": 1712627257.6557357}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.35697340965271, total / elapsed =207.43887099194413 in_token_count =5 out_token_count =1936\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 207.43887099194413, "units": "Tok/s", "t": 1712627268.3457751}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 60, "power": 260.496}}, "t": 1712627259.0576618}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 60, "power": 265.609}}, "t": 1712627259.5943687}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 60, "power": 265.326}}, "t": 1712627260.1268036}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 60, "power": 260.131}}, "t": 1712627261.0360928}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 60, "power": 264.187}}, "t": 1712627261.5761147}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 60, "power": 264.504}}, "t": 1712627262.9679575}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.97, "temperature": 60, "power": 266.009}}, "t": 1712627263.4978275}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 61, "power": 268.039}}, "t": 1712627264.9074416}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 61, "power": 266.188}}, "t": 1712627265.4405923}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 61, "power": 262.411}}, "t": 1712627266.828696}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 61, "power": 256.191}}, "t": 1712627267.3620627}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28349.25, 81920.0], "load": 0.96, "temperature": 61, "power": 268.926}}, "t": 1712627267.8981688}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =1.5644705295562744, total / elapsed =1341.66797031027 in_token_count =349 out_token_count =1750\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 1341.66797031027, "units": "Tok/s", "t": 1712627269.9102626}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28393.25, 81920.0], "load": 0.97, "temperature": 61, "power": 292.681}}, "t": 1712627268.7195396}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28393.25, 81920.0], "load": 0.97, "temperature": 62, "power": 265.388}}, "t": 1712627269.2537262}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =2.9817631244659424, total / elapsed =781.4168673835624 in_token_count =287 out_token_count =2043\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 781.4168673835624, "units": "Tok/s", "t": 1712627272.892041}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28393.25, 81920.0], "load": 0.96, "temperature": 62, "power": 295.186}}, "t": 1712627270.653644}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28393.25, 81920.0], "load": 0.97, "temperature": 62, "power": 269.6}}, "t": 1712627271.1826077}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28393.25, 81920.0], "load": 0.97, "temperature": 62, "power": 267.287}}, "t": 1712627271.7132096}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28393.25, 81920.0], "load": 0.96, "temperature": 62, "power": 266.884}}, "t": 1712627272.62412}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.312315225601196, total / elapsed =217.02444032864295 in_token_count =7 out_token_count =2014\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 217.02444032864295, "units": "Tok/s", "t": 1712627282.204383}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28393.25, 81920.0], "load": 0.95, "temperature": 62, "power": 269.055}}, "t": 1712627273.158884}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28393.25, 81920.0], "load": 0.95, "temperature": 62, "power": 266.468}}, "t": 1712627274.5515063}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28393.25, 81920.0], "load": 0.96, "temperature": 62, "power": 270.902}}, "t": 1712627275.081526}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28393.25, 81920.0], "load": 0.96, "temperature": 63, "power": 265.934}}, "t": 1712627275.6111226}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28393.25, 81920.0], "load": 0.96, "temperature": 63, "power": 270.306}}, "t": 1712627276.5238962}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28393.25, 81920.0], "load": 0.96, "temperature": 63, "power": 267.864}}, "t": 1712627277.0631037}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28393.25, 81920.0], "load": 0.97, "temperature": 63, "power": 270.984}}, "t": 1712627278.4582083}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28393.25, 81920.0], "load": 0.96, "temperature": 63, "power": 263.45}}, "t": 1712627278.9883225}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28393.25, 81920.0], "load": 0.97, "temperature": 63, "power": 265.197}}, "t": 1712627279.5230267}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28393.25, 81920.0], "load": 0.96, "temperature": 63, "power": 261.043}}, "t": 1712627280.4424365}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28393.25, 81920.0], "load": 0.97, "temperature": 64, "power": 261.384}}, "t": 1712627280.9753268}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =1.199481725692749, total / elapsed =1826.6222428145866 in_token_count =363 out_token_count =1828\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 1826.6222428145866, "units": "Tok/s", "t": 1712627283.4038837}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 64, "power": 269.126}}, "t": 1712627282.349237}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 64, "power": 276.512}}, "t": 1712627282.8659673}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 64, "power": 273.712}}, "t": 1712627283.3867722}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.307106018066406, total / elapsed =214.35234498537176 in_token_count =7 out_token_count =1988\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 214.35234498537176, "units": "Tok/s", "t": 1712627292.7110102}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.95, "temperature": 64, "power": 274.097}}, "t": 1712627284.2910051}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 64, "power": 274.414}}, "t": 1712627284.807588}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 64, "power": 265.811}}, "t": 1712627285.3460302}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 64, "power": 269.921}}, "t": 1712627286.2561173}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 64, "power": 275.003}}, "t": 1712627286.7896035}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 65, "power": 268.172}}, "t": 1712627288.1984792}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 65, "power": 276.553}}, "t": 1712627288.7190747}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 65, "power": 274.008}}, "t": 1712627289.2366269}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 65, "power": 274.981}}, "t": 1712627290.1571295}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 65, "power": 277.816}}, "t": 1712627290.6900637}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 65, "power": 274.8}}, "t": 1712627292.100338}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 65, "power": 269.692}}, "t": 1712627292.630412}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =1.6553568840026855, total / elapsed =1220.884764808651 in_token_count =344 out_token_count =1677\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 1220.884764808651, "units": "Tok/s", "t": 1712627294.36639}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 66, "power": 279.524}}, "t": 1712627293.979481}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =6.712304592132568, total / elapsed =306.15416386327985 in_token_count =122 out_token_count =1933\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 306.15416386327985, "units": "Tok/s", "t": 1712627301.0787148}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 68, "power": 300.678}}, "t": 1712627294.51936}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 66, "power": 271.866}}, "t": 1712627295.9215503}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 66, "power": 278.416}}, "t": 1712627296.454988}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 66, "power": 272.996}}, "t": 1712627297.8668013}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 66, "power": 276.127}}, "t": 1712627298.4012187}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 66, "power": 275.236}}, "t": 1712627299.7997377}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 66, "power": 268.238}}, "t": 1712627300.333716}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.329623699188232, total / elapsed =219.1942639849381 in_token_count =6 out_token_count =2039\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 219.1942639849381, "units": "Tok/s", "t": 1712627310.4083612}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 67, "power": 276.94}}, "t": 1712627301.7400668}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.95, "temperature": 67, "power": 273.34}}, "t": 1712627302.2716978}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 67, "power": 269.753}}, "t": 1712627302.8096454}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 67, "power": 277.273}}, "t": 1712627303.7200575}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 67, "power": 275.46}}, "t": 1712627304.2516243}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 67, "power": 269.262}}, "t": 1712627305.6611714}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 67, "power": 276.494}}, "t": 1712627306.1932247}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 67, "power": 279.51}}, "t": 1712627307.5983725}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 67, "power": 280.752}}, "t": 1712627308.1332417}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 68, "power": 279.594}}, "t": 1712627309.5402324}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 68, "power": 277.058}}, "t": 1712627310.0800362}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =7.412826299667358, total / elapsed =267.5093034473214 in_token_count =91 out_token_count =1892\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 267.5093034473214, "units": "Tok/s", "t": 1712627317.821213}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 68, "power": 279.026}}, "t": 1712627311.4588988}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 68, "power": 273.652}}, "t": 1712627311.99338}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 68, "power": 273.996}}, "t": 1712627313.3942735}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 68, "power": 277.74}}, "t": 1712627313.930838}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 68, "power": 279.026}}, "t": 1712627315.3233595}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 68, "power": 279.788}}, "t": 1712627315.8420393}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 69, "power": 279.208}}, "t": 1712627316.3780344}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 69, "power": 279.014}}, "t": 1712627317.2915342}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 69, "power": 278.704}}, "t": 1712627317.8213253}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =5.828005790710449, total / elapsed =358.7848185279688 in_token_count =162 out_token_count =1929\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 358.7848185279688, "units": "Tok/s", "t": 1712627323.649238}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 69, "power": 276.4}}, "t": 1712627319.223018}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 69, "power": 281.353}}, "t": 1712627319.7461584}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 69, "power": 281.857}}, "t": 1712627320.2716837}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 69, "power": 281.862}}, "t": 1712627321.2005591}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 69, "power": 285.742}}, "t": 1712627321.7378619}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 69, "power": 277.452}}, "t": 1712627323.1367137}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =5.265485763549805, total / elapsed =407.93957033736126 in_token_count =186 out_token_count =1962\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 407.93957033736126, "units": "Tok/s", "t": 1712627328.9147449}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 69, "power": 281.751}}, "t": 1712627323.6658182}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 275.409}}, "t": 1712627324.1930602}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 70, "power": 274.985}}, "t": 1712627325.0789526}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 70, "power": 280.373}}, "t": 1712627325.61302}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 70, "power": 277.047}}, "t": 1712627327.0105956}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 70, "power": 282.336}}, "t": 1712627327.5341768}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 70, "power": 277.947}}, "t": 1712627328.0643747}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =6.832199335098267, total / elapsed =293.60970042176734 in_token_count =117 out_token_count =1889\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 293.60970042176734, "units": "Tok/s", "t": 1712627335.7469637}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.95, "temperature": 70, "power": 284.196}}, "t": 1712627328.9692357}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 275.608}}, "t": 1712627329.5041144}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 284.882}}, "t": 1712627330.911915}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 70, "power": 279.719}}, "t": 1712627331.4479327}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 282.644}}, "t": 1712627332.8590405}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 283.597}}, "t": 1712627333.3867714}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 70, "power": 282.857}}, "t": 1712627333.9210372}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 282.075}}, "t": 1712627334.83704}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 71, "power": 280.388}}, "t": 1712627335.3699894}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.327662467956543, total / elapsed =211.62858398675033 in_token_count =6 out_token_count =1968\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 211.62858398675033, "units": "Tok/s", "t": 1712627345.0746527}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 280.795}}, "t": 1712627336.7732785}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 275.814}}, "t": 1712627337.3085501}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 276.198}}, "t": 1712627338.700491}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 71, "power": 286.643}}, "t": 1712627339.2345011}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 281.745}}, "t": 1712627340.6246612}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 287.226}}, "t": 1712627341.1468196}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 71, "power": 274.844}}, "t": 1712627341.6829484}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 71, "power": 277.666}}, "t": 1712627342.600089}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 71, "power": 278.762}}, "t": 1712627343.1355226}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 71, "power": 276.455}}, "t": 1712627344.5404882}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 71, "power": 286.608}}, "t": 1712627345.0680563}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =7.418476104736328, total / elapsed =266.76637790024114 in_token_count =91 out_token_count =1888\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 266.76637790024114, "units": "Tok/s", "t": 1712627352.4931533}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 71, "power": 291.249}}, "t": 1712627345.607415}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 72, "power": 284.58}}, "t": 1712627346.5289085}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 72, "power": 278.346}}, "t": 1712627347.059327}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 72, "power": 289.374}}, "t": 1712627348.4652848}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 72, "power": 291.522}}, "t": 1712627349.000431}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 72, "power": 280.631}}, "t": 1712627350.4185123}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 72, "power": 278.641}}, "t": 1712627350.9381814}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 72, "power": 282.604}}, "t": 1712627351.4650714}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 72, "power": 289.176}}, "t": 1712627352.3870013}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.257383108139038, total / elapsed =215.50366628405035 in_token_count =9 out_token_count =1986\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 215.50366628405035, "units": "Tok/s", "t": 1712627361.7505574}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.94, "temperature": 72, "power": 286.647}}, "t": 1712627352.9253263}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.95, "temperature": 72, "power": 285.102}}, "t": 1712627354.3279622}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 72, "power": 282.836}}, "t": 1712627354.8599615}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 72, "power": 289.486}}, "t": 1712627356.2661207}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.95, "temperature": 72, "power": 291.218}}, "t": 1712627356.7855737}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 72, "power": 282.037}}, "t": 1712627357.3106875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 73, "power": 285.387}}, "t": 1712627358.2276938}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 73, "power": 287.83}}, "t": 1712627358.763903}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 73, "power": 290.346}}, "t": 1712627360.161319}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 73, "power": 290.82}}, "t": 1712627360.694221}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.316619396209717, total / elapsed =579.5057467843585 in_token_count =273 out_token_count =1649\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 579.5057467843585, "units": "Tok/s", "t": 1712627365.0671947}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 73, "power": 287.964}}, "t": 1712627362.079147}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 73, "power": 281.201}}, "t": 1712627362.6098082}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 73, "power": 282.127}}, "t": 1712627363.1522315}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 73, "power": 282.994}}, "t": 1712627364.0574172}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 73, "power": 291.5}}, "t": 1712627364.5776348}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.413362979888916, total / elapsed =623.1391189662521 in_token_count =269 out_token_count =1858\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 623.1391189662521, "units": "Tok/s", "t": 1712627368.4805775}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 291.633}}, "t": 1712627365.1004004}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 73, "power": 290.857}}, "t": 1712627366.0230432}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 73, "power": 288.709}}, "t": 1712627366.5438921}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 73, "power": 290.048}}, "t": 1712627367.0695739}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 73, "power": 282.363}}, "t": 1712627367.994829}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =4.68228006362915, total / elapsed =389.34023066254304 in_token_count =213 out_token_count =1610\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 389.34023066254304, "units": "Tok/s", "t": 1712627373.1628747}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 73, "power": 291.909}}, "t": 1712627368.5328832}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 73, "power": 284.884}}, "t": 1712627369.8968234}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 73, "power": 285.5}}, "t": 1712627370.4286046}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 73, "power": 286.195}}, "t": 1712627371.8359573}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 73, "power": 287.906}}, "t": 1712627372.374411}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.210904359817505, total / elapsed =218.54531556984742 in_token_count =11 out_token_count =2002\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 218.54531556984742, "units": "Tok/s", "t": 1712627382.3738008}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 73, "power": 330.445}}, "t": 1712627373.7742841}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 73, "power": 289.459}}, "t": 1712627374.304525}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 73, "power": 293.475}}, "t": 1712627375.7103767}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 73, "power": 289.287}}, "t": 1712627376.2344644}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 73, "power": 294.422}}, "t": 1712627376.7713125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 73, "power": 282.274}}, "t": 1712627377.6798837}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 73, "power": 290.069}}, "t": 1712627378.2167983}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 73, "power": 287.133}}, "t": 1712627379.6173437}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 73, "power": 295.42}}, "t": 1712627380.15268}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 73, "power": 293.873}}, "t": 1712627381.5576804}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 73, "power": 293.445}}, "t": 1712627382.0952427}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =6.141863107681274, total / elapsed =308.37548261720184 in_token_count =148 out_token_count =1746\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 308.37548261720184, "units": "Tok/s", "t": 1712627388.5156858}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 73, "power": 287.523}}, "t": 1712627383.490893}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 73, "power": 291.597}}, "t": 1712627384.0261266}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 73, "power": 292.676}}, "t": 1712627385.4063013}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 74, "power": 291.716}}, "t": 1712627385.9272735}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 74, "power": 286.323}}, "t": 1712627386.4584165}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 74, "power": 284.398}}, "t": 1712627387.3705757}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 294.221}}, "t": 1712627387.9036531}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =6.992175817489624, total / elapsed =290.89657541395457 in_token_count =110 out_token_count =1924\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 290.89657541395457, "units": "Tok/s", "t": 1712627395.5078862}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 74, "power": 292.595}}, "t": 1712627389.3079758}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 74, "power": 291.238}}, "t": 1712627389.8418212}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 74, "power": 294.255}}, "t": 1712627391.2420151}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 286.449}}, "t": 1712627391.7858546}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 290.341}}, "t": 1712627393.185976}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 289.07}}, "t": 1712627393.7188501}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 294.136}}, "t": 1712627395.1295016}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.324912548065186, total / elapsed =218.76880769496086 in_token_count =6 out_token_count =2034\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 218.76880769496086, "units": "Tok/s", "t": 1712627404.832817}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 74, "power": 290.713}}, "t": 1712627395.6645644}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 287.889}}, "t": 1712627397.0513253}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 285.485}}, "t": 1712627397.579048}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 293.469}}, "t": 1712627398.9920406}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 74, "power": 284.679}}, "t": 1712627399.5118825}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 292.013}}, "t": 1712627400.0391448}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 295.705}}, "t": 1712627400.9714277}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 74, "power": 283.097}}, "t": 1712627401.508124}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 74, "power": 287.998}}, "t": 1712627402.900111}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 74, "power": 285.268}}, "t": 1712627403.4315426}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.1993672847747803, total / elapsed =638.8763208672641 in_token_count =278 out_token_count =1766\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 638.8763208672641, "units": "Tok/s", "t": 1712627408.0322053}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 293.645}}, "t": 1712627404.8433628}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 1.0, "temperature": 74, "power": 279.726}}, "t": 1712627405.370501}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 290.145}}, "t": 1712627405.9030106}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 292.097}}, "t": 1712627406.8044217}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 74, "power": 288.973}}, "t": 1712627407.340396}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.32949161529541, total / elapsed =220.05486307894535 in_token_count =6 out_token_count =2047\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 220.05486307894535, "units": "Tok/s", "t": 1712627417.3617172}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 74, "power": 296.076}}, "t": 1712627408.72651}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 74, "power": 280.605}}, "t": 1712627409.249491}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 74, "power": 288.945}}, "t": 1712627409.7904286}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 74, "power": 293.082}}, "t": 1712627410.7016451}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 293.079}}, "t": 1712627411.2348166}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 289.614}}, "t": 1712627412.6462245}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 291.888}}, "t": 1712627413.187166}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 293.665}}, "t": 1712627414.583744}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 74, "power": 295.795}}, "t": 1712627415.1128058}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 74, "power": 295.339}}, "t": 1712627415.6458068}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 295.102}}, "t": 1712627416.561043}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 294.055}}, "t": 1712627417.0908616}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =1.6613223552703857, total / elapsed =1322.4405203663384 in_token_count =344 out_token_count =1853\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 1322.4405203663384, "units": "Tok/s", "t": 1712627419.0230567}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 289.669}}, "t": 1712627418.4948254}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =7.114933729171753, total / elapsed =284.33153097484956 in_token_count =105 out_token_count =1918\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 284.33153097484956, "units": "Tok/s", "t": 1712627426.138009}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 74, "power": 284.719}}, "t": 1712627419.035054}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 74, "power": 293.767}}, "t": 1712627420.4299903}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 74, "power": 288.978}}, "t": 1712627420.9619966}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 292.597}}, "t": 1712627422.3695502}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 74, "power": 285.786}}, "t": 1712627422.907975}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 295.886}}, "t": 1712627424.3130295}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 294.548}}, "t": 1712627424.8456922}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 293.756}}, "t": 1712627425.3825834}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.086385726928711, total / elapsed =223.52121746063034 in_token_count =17 out_token_count =2014\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 223.52121746063034, "units": "Tok/s", "t": 1712627435.2244184}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.95, "temperature": 74, "power": 290.949}}, "t": 1712627426.277125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 295.419}}, "t": 1712627426.8096766}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 74, "power": 285.058}}, "t": 1712627428.218049}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 74, "power": 288.21}}, "t": 1712627428.7525682}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.95, "temperature": 74, "power": 289.101}}, "t": 1712627429.2969115}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 74, "power": 292.099}}, "t": 1712627430.19746}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 74, "power": 285.106}}, "t": 1712627430.724754}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 74, "power": 293.547}}, "t": 1712627431.2599432}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 290.563}}, "t": 1712627432.176357}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 292.711}}, "t": 1712627432.7174654}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 74, "power": 294.737}}, "t": 1712627434.1098185}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 287.554}}, "t": 1712627434.6284788}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 283.454}}, "t": 1712627435.1572804}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =4.612536430358887, total / elapsed =462.00177108068794 in_token_count =216 out_token_count =1915\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 462.00177108068794, "units": "Tok/s", "t": 1712627439.8369725}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.96, "temperature": 75, "power": 292.392}}, "t": 1712627436.0738657}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 293.869}}, "t": 1712627436.6084404}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 294.141}}, "t": 1712627438.0230427}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28439.25, 81920.0], "load": 0.97, "temperature": 74, "power": 294.646}}, "t": 1712627438.557584}, "pipe": "data"} +{"event": "end", "data": {"command": ["python", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1712627441.584891, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/opt-1_3b-multinode.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/opt-1_3b-multinode.data new file mode 100644 index 000000000..3b45f0015 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/opt-1_3b-multinode.data @@ -0,0 +1 @@ +{"event": "message", "data": {"message": "Skip opt-1_3b-multinode because the following capability is not satisfied: len(nodes) >= 2"}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/opt-1_3b.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/opt-1_3b.data new file mode 100644 index 000000000..3e2c6cea0 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/opt-1_3b.data @@ -0,0 +1 @@ +{"event": "error", "data": {"type": "KeyError", "message": "'port'", "trace": "Traceback (most recent call last):\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/multi.py\", line 202, in do_run\n await exec_plan.execute(\"run\", timeout=True, timeout_delay=600)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 108, in execute\n return await execute_command(self, phase, timeout, timeout_delay, **kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/executors.py\", line 57, in execute_command\n for pack, argv, _kwargs in command.commands():\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 125, in commands\n yield self.pack, self.argv(), self.kwargs()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 121, in argv\n return self._argv(**kwargs) + self.exec.argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 304, in argv\n script_args = self.exec.argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 122, in argv\n return self._argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 682, in _argv\n f\"--main_process_port={manager['port']}\",\nKeyError: 'port'\n"}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/opt-6_7b-multinode.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/opt-6_7b-multinode.data new file mode 100644 index 000000000..cccd5c098 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/opt-6_7b-multinode.data @@ -0,0 +1 @@ +{"event": "message", "data": {"message": "Skip opt-6_7b-multinode because the following capability is not satisfied: len(nodes) >= 2"}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/opt-6_7b.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/opt-6_7b.data new file mode 100644 index 000000000..3e2c6cea0 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/opt-6_7b.data @@ -0,0 +1 @@ +{"event": "error", "data": {"type": "KeyError", "message": "'port'", "trace": "Traceback (most recent call last):\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/multi.py\", line 202, in do_run\n await exec_plan.execute(\"run\", timeout=True, timeout_delay=600)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 108, in execute\n return await execute_command(self, phase, timeout, timeout_delay, **kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/executors.py\", line 57, in execute_command\n for pack, argv, _kwargs in command.commands():\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 150, in commands\n yield from executor.commands()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 125, in commands\n yield self.pack, self.argv(), self.kwargs()\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 121, in argv\n return self._argv(**kwargs) + self.exec.argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 304, in argv\n script_args = self.exec.argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 122, in argv\n return self._argv(**kwargs)\n File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/milabench/commands/__init__.py\", line 682, in _argv\n f\"--main_process_port={manager['port']}\",\nKeyError: 'port'\n"}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/reformer.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/reformer.D0.data new file mode 100644 index 000000000..d353dee4b --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/reformer.D0.data @@ -0,0 +1,290 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Reformer", "--batch-size": 64}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "transformer"], "weight": 1.0, "name": "reformer", "tag": ["reformer", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.01, "memory": 0.010771942138671876}, "temperature": 68, "power": 98.497, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.01, "memory": 0.010771942138671876}, "temperature": 69, "power": 108.055, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712628436.80605, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-reformer.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Reformer", "--batch-size", "64"], "time": 1712628439.8685179}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "line", "data": "We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 5.877542018890381}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.849128246307373}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25417.25, 81920.0], "load": 0.22, "temperature": 67, "power": 304.241}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.813958644866943}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.7900309562683105}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.7752156257629395}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 68, "power": 320.268}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.7631072998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.75518798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.034187632988335, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.7507781982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 69, "power": 260.074}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.732280488320534, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.7458271980285645}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.60442301038741, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.7409539222717285}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.71517648927361, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.735416412353516}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 70, "power": 322.295}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.60553353879554, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.73209810256958}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.405496693448434, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.729013919830322}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.65744709317221, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.726690769195557}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 71, "power": 315.512}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.684225123550746, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.723057270050049}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.532250569271724, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.720351219177246}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.52257451973401, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.717747688293457}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 71, "power": 296.451}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.530600576052414, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.713781833648682}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.53796925510874, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.710906028747559}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.50051383633144, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.70851469039917}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 71, "power": 306.875}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.49413549421762, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.705513000488281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.32221294442779, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.703208923339844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.502861514634176, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.700363636016846}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25417.25, 81920.0], "load": 0.97, "temperature": 72, "power": 290.044}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.34616654640276, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.697647571563721}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.376627614913126, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.69537353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.46093938553654, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.693230628967285}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 72, "power": 317.468}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.426800205290135, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.689853191375732}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.318699420243945, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.688460826873779}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.3403171262865, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.684384346008301}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 72, "power": 301.519}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.43860325023937, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.681786060333252}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.34661348976726, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.678401470184326}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.31524897205577, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.675480842590332}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25417.25, 81920.0], "load": 0.98, "temperature": 73, "power": 292.282}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.44094026670434, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.671544551849365}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.27701295984424, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.669521808624268}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.384230304452565, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.665090560913086}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 73, "power": 275.995}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.32783569331363, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.661938190460205}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.304577239091536, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.656990051269531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.36811320064036, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.6528801918029785}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25417.25, 81920.0], "load": 0.98, "temperature": 73, "power": 300.427}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.35523174711247, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.64725399017334}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.284846346822235, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.642117500305176}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.27127094005859, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.637569904327393}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 73, "power": 326.922}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.19922405622755, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.631320476531982}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.2800417946829, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.6241865158081055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.24105771244338, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.617847919464111}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 73, "power": 291.634}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.257219095577376, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.610367298126221}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.2802995882668, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.602100849151611}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.259627252176585, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.5928053855896}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 73, "power": 294.125}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.246945876104554, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.584080696105957}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.38817614133749, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.574090003967285}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.32554354636447, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.563380241394043}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 73, "power": 323.073}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.282386281617796, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.549855709075928}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.36735336643794, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.538491725921631}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.31092876127296, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.524754524230957}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 72, "power": 319.268}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.316732477674755, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.512552738189697}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.33751422087651, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.499128818511963}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.4060717534999, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.486915111541748}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 72, "power": 255.842}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.40096052177067, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.473858833312988}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.42136949929388, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.465662002563477}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.40043539444553, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.460432052612305}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 71, "power": 324.279}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.40826269345466, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.4458770751953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.483374719292804, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.4319634437561035}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.50187979360034, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.422971248626709}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 70, "power": 322.433}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.498350197508614, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.412723541259766}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.55203644013603, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.396388530731201}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.537709486450396, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.39016580581665}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 70, "power": 316.384}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.50839139563835, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.371394157409668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.527620535124676, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.36153507232666}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 70, "power": 295.104}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.568828305259395, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 69, "power": 305.609}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-reformer.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Reformer", "--batch-size", "64"], "time": 1712628516.457621, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/reformer.D1.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/reformer.D1.data new file mode 100644 index 000000000..27ac5aad5 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/reformer.D1.data @@ -0,0 +1,290 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Reformer", "--batch-size": 64}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "transformer"], "weight": 1.0, "name": "reformer", "tag": ["reformer", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 66, "power": 93.212, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 68, "power": 101.466, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712628439.849355, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-reformer.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Reformer", "--batch-size", "64"], "time": 1712628439.876669}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "line", "data": "We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 5.877199649810791}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.84792947769165}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25417.25, 81920.0], "load": 0.17, "temperature": 70, "power": 310.747}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.81281852722168}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.789834499359131}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.7744550704956055}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 71, "power": 327.483}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.762836456298828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.754806995391846}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.46806678073715, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.750513076782227}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 71, "power": 308.286}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.44078893552444, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.745347023010254}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.47941699066298, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.739916801452637}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.427000326492745, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.735625267028809}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25417.25, 81920.0], "load": 0.98, "temperature": 72, "power": 299.317}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.36040953527279, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.731861114501953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.21407786027338, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.728966236114502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.36810208774123, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.725690841674805}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 73, "power": 257.308}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.31145219313737, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.722377300262451}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.31419274772197, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.7193403244018555}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.32024699525636, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.716220855712891}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25417.25, 81920.0], "load": 0.98, "temperature": 73, "power": 283.623}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.27560161908663, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.713222503662109}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.224536948621456, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.710509300231934}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.29301190370856, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.708148002624512}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 74, "power": 276.878}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.24643439469255, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.704833507537842}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.247511306578005, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.701851844787598}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.347941870263654, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.700043201446533}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 73, "power": 276.034}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.342550772944456, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.698017597198486}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.18991432462573, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.695438385009766}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.253946230364704, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.692138195037842}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25417.25, 81920.0], "load": 0.97, "temperature": 74, "power": 300.089}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.13045338718917, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.68941593170166}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.20881139185861, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.686017036437988}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.2198723696015, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.684140682220459}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 74, "power": 321.2}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.18577650557087, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.680732727050781}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.17648201220133, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.677685737609863}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.129371251347045, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.674522876739502}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25417.25, 81920.0], "load": 0.98, "temperature": 75, "power": 332.37}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.11253094003494, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.670896053314209}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.17647963372942, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.667908668518066}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.12235656335772, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.663943767547607}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 75, "power": 302.924}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.08478254668411, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.6600470542907715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.10829500682011, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.654635906219482}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.05736127957588, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.651019096374512}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 75, "power": 331.265}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.04301288939653, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.645768165588379}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.040888181602774, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.640763282775879}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.052282404232336, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.634705066680908}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 75, "power": 316.961}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.125192650166674, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.627997398376465}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.090416599643945, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.621215343475342}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.000211639095625, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.616032123565674}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 75, "power": 248.004}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 54.98011732540552, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.605618953704834}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.040883069323066, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.602279186248779}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.01151592164096, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.589416027069092}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 75, "power": 241.894}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.085836808025995, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.580410480499268}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.15173469772313, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.569159984588623}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.063825603042304, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.556602954864502}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 75, "power": 280.188}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.086548063704775, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.5447001457214355}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.11604143851694, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.533631324768066}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.15211320358679, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.520388126373291}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 75, "power": 263.753}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.16227401606561, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.5087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.19234969803089, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.499040126800537}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 74, "power": 338.105}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.25573205373145, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.482672691345215}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.2722535417793, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.470543384552002}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.223137911490525, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.4615797996521}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.21890339711603, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.451319694519043}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 73, "power": 259.047}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.22135648135287, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.441404819488525}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.29370100292319, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.438401222229004}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 72, "power": 320.112}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.37192226590545, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.4200592041015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.358508530232804, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.403636455535889}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.41316877279986, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.3975114822387695}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.47147822570698, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.387638092041016}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 72, "power": 296.459}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.37335937031375, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.388059616088867}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.43224239417883, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.359933376312256}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25417.25, 81920.0], "load": 1.0, "temperature": 71, "power": 321.712}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.495897225170886, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [25417.25, 81920.0], "load": 0.27, "temperature": 68, "power": 101.832}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-reformer.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Reformer", "--batch-size", "64"], "time": 1712628516.87864, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/regnet_y_128gf.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/regnet_y_128gf.D0.data new file mode 100644 index 000000000..38b1065ac --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/regnet_y_128gf.D0.data @@ -0,0 +1,404 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "regnet_y_128gf", "--batch-size": 64}, "tags": ["classification", "convnet", "lstm", "resnet", "vision"], "weight": 2.0, "name": "regnet_y_128gf", "tag": ["regnet_y_128gf", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.05, "memory": 0.010771942138671876}, "temperature": 57, "power": 88.18, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 58, "power": 89.443, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712628037.898004, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-regnet_y_128gf.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "regnet_y_128gf", "--batch-size", "64"], "time": 1712628040.9793696}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 53, "power": 81.262}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1985.25, 81920.0], "load": 0, "temperature": 52, "power": 81.456}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03619384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00616455078125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31567.25, 81920.0], "load": 0.98, "temperature": 57, "power": 302.989}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94366455078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92230224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02435302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.16741943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96478271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31567.25, 81920.0], "load": 0.97, "temperature": 57, "power": 304.52}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.15771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.017822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 85.12783779749235, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13763427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05963134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31567.25, 81920.0], "load": 0.96, "temperature": 59, "power": 300.296}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.19372049532787, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03607177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97662353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.61919732581593, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.35614013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.4664306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.79516807047067, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31567.25, 81920.0], "load": 1.0, "temperature": 59, "power": 300.906}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.239990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.99607131897149, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.17510986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.5086669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.60573889271562, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.4044189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9764404296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.44017918330458, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31567.25, 81920.0], "load": 0.96, "temperature": 59, "power": 293.935}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06768798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.12451171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.72793339906218, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07763671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01544189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 84.77393573315881, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04644775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14105224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.72086981416982, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31567.25, 81920.0], "load": 1.0, "temperature": 59, "power": 167.846}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.45257568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 62.696178116903155, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.43798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08331298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.55820838588714, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04290771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31567.25, 81920.0], "load": 1.0, "temperature": 60, "power": 301.912}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01446533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.68067499149402, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9464111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.25390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.66793194310351, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.16436767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.26544189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.34357509610528, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31567.25, 81920.0], "load": 0.99, "temperature": 60, "power": 293.553}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03173828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1256103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.68162837452489, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.007080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.047119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 84.60365853305886, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1983642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.35552978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.53780957896747, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31567.25, 81920.0], "load": 1.0, "temperature": 60, "power": 304.263}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92474365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 62.59412861048949, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2254638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.22601318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.59579659187892, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05743408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31567.25, 81920.0], "load": 0.99, "temperature": 60, "power": 311.15}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06365966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.92281135845143, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.277587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.32281494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.16508941411085, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.38861083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31567.25, 81920.0], "load": 1.0, "temperature": 60, "power": 294.894}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.54673659467007, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0684814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9925537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.16552953137692, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0540771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.20361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.9509342225694, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.15069580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31567.25, 81920.0], "load": 0.99, "temperature": 61, "power": 225.696}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.054931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 81.11187329586767, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.018310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.141845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 87.6713843800704, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9903564453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.26671330503297, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00531005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31567.25, 81920.0], "load": 0.98, "temperature": 62, "power": 344.002}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86383056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.17598694138134, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1290283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7943115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.46370679145551, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00726318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31567.25, 81920.0], "load": 1.0, "temperature": 60, "power": 309.36}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 50.79952894422302, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90667724609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90716552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.34175667819486, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01605224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 77.22232484814468, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.853515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.34379717524203, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31567.25, 81920.0], "load": 1.0, "temperature": 61, "power": 303.728}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98956298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.69039600654017, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0294189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84466552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.55956048950688, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89312744140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9566650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.37246372514865, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31567.25, 81920.0], "load": 0.99, "temperature": 62, "power": 313.366}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14141845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14886474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.81021203155055, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1663818359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2611083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 88.2614860989797, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05230712890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 86.69354311895651, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.191650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31567.25, 81920.0], "load": 1.0, "temperature": 61, "power": 298.524}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1234130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.28186025457218, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02764892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.987060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.11883581542773, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0841064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31567.25, 81920.0], "load": 0.99, "temperature": 64, "power": 302.017}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92816162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.16621434963577, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04681396484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9100341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.99277830891157, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96356201171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97906494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.12308428572129, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31567.25, 81920.0], "load": 0.94, "temperature": 64, "power": 300.055}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.5179443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 81.06576944153926, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2796630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.15191650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.51133386097231, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99945068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.021728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.72427128547459, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10284423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31567.25, 81920.0], "load": 0.99, "temperature": 65, "power": 303.485}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02862548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.62794812956025, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8790283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13043212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 78.78522649740411, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05841064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31567.25, 81920.0], "load": 0.99, "temperature": 66, "power": 308.917}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0330810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 88.25564525700754, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97674560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 74.75408097420453, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.036376953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.22381591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.25547334174688, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31567.25, 81920.0], "load": 1.0, "temperature": 67, "power": 302.515}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04339599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.77750628223352, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.39404296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.39511414791714, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99554443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 80.96446843245378, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31567.25, 81920.0], "load": 0.95, "temperature": 67, "power": 331.102}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.09002685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98956298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 85.45568388205645, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0643310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 74.64122287953006, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2716064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31567.25, 81920.0], "load": 1.0, "temperature": 67, "power": 280.904}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.43184065801552, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99871826171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9783935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.56673883729063, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92926025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98858642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.464291533269, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31567.25, 81920.0], "load": 0.98, "temperature": 69, "power": 296.762}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31567.25, 81920.0], "load": 0.99, "temperature": 69, "power": 308.501}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-regnet_y_128gf.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "regnet_y_128gf", "--batch-size", "64"], "time": 1712628134.6530924, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/regnet_y_128gf.D1.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/regnet_y_128gf.D1.data new file mode 100644 index 000000000..22a5136a7 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/regnet_y_128gf.D1.data @@ -0,0 +1,402 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "regnet_y_128gf", "--batch-size": 64}, "tags": ["classification", "convnet", "lstm", "resnet", "vision"], "weight": 2.0, "name": "regnet_y_128gf", "tag": ["regnet_y_128gf", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 56, "power": 83.56, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 57, "power": 88.159, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712628040.961638, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-regnet_y_128gf.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "regnet_y_128gf", "--batch-size", "64"], "time": 1712628040.9877372}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 54, "power": 85.886}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1773.25, 81920.0], "load": 0, "temperature": 53, "power": 85.678}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03619384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00616455078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94366455078125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31567.25, 81920.0], "load": 0.99, "temperature": 59, "power": 304.168}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92230224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02435302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.16741943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96478271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.15771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31567.25, 81920.0], "load": 0.96, "temperature": 60, "power": 303.599}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.017822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 85.36139594915922, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13763427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05963134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.51816351305098, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03607177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31567.25, 81920.0], "load": 0.96, "temperature": 60, "power": 294.568}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97662353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.64471211000132, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.35614013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.4664306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.5885712813222, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.239990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31567.25, 81920.0], "load": 0.96, "temperature": 61, "power": 314.357}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.33819049821143, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.17510986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.5086669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.25139774396884, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.4044189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9764404296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.71703364282683, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06768798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31567.25, 81920.0], "load": 1.0, "temperature": 61, "power": 300.717}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.12451171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.36696916569785, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07763671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01544189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 81.86344569294421, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04644775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14105224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 86.7664401644563, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31567.25, 81920.0], "load": 0.96, "temperature": 62, "power": 331.838}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.45257568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.52567779061854, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.43798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08331298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.34496180121982, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04290771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31567.25, 81920.0], "load": 1.0, "temperature": 62, "power": 300.246}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01446533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.64331575907845, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9464111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.25390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.92254890479069, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.16436767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.26544189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.18433686662023, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03173828125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31567.25, 81920.0], "load": 0.99, "temperature": 62, "power": 302.391}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1256103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.35241106469817, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.007080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.047119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.30503034361038, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1983642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.35552978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31567.25, 81920.0], "load": 1.0, "temperature": 62, "power": 200.311}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 83.5300867123742, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92474365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2254638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 89.69940951411306, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.22601318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 68.3914682934193, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05743408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31567.25, 81920.0], "load": 0.94, "temperature": 62, "power": 228.415}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06365966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.2157032853799, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.277587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.32281494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.12605768607841, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.38861083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.26252362086443, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0684814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31567.25, 81920.0], "load": 0.99, "temperature": 62, "power": 333.055}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9925537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.24300898514927, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0540771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.20361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 81.6184937620825, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.15069580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31567.25, 81920.0], "load": 0.99, "temperature": 63, "power": 296.187}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.054931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 86.43047504193865, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.018310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.86174746048859, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.141845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9903564453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.4008379972556, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00531005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31567.25, 81920.0], "load": 1.0, "temperature": 63, "power": 302.845}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86383056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.46844394556378, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1290283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7943115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.46032125242378, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31567.25, 81920.0], "load": 1.0, "temperature": 63, "power": 282.355}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00726318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 65.51449457469916, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90667724609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 83.59654238065238, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90716552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01605224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.33675900602383, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.853515625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31567.25, 81920.0], "load": 0.95, "temperature": 63, "power": 314.29}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.60506356968561, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98956298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.26722407965617, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0294189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84466552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.36001897694419, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31567.25, 81920.0], "load": 1.0, "temperature": 64, "power": 226.748}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89312744140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9566650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.98786156578274, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14141845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.94254827088434, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14886474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1663818359375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31567.25, 81920.0], "load": 1.0, "temperature": 64, "power": 305.947}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.15317180782283, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2611083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05230712890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.0113787680063, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.191650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1234130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.19814675856051, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31567.25, 81920.0], "load": 1.0, "temperature": 65, "power": 296.331}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02764892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.987060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.8117783972108, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0841064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92816162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 87.73084111599165, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04681396484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 85.32236326644343, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9100341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31567.25, 81920.0], "load": 0.99, "temperature": 65, "power": 310.898}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96356201171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 74.9949849740341, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97906494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.5179443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.66274641406565, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2796630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31567.25, 81920.0], "load": 1.0, "temperature": 67, "power": 299.514}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.15191650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.74127282515477, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99945068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.021728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.33703316642256, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10284423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02862548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.14504770230779, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31567.25, 81920.0], "load": 0.95, "temperature": 67, "power": 320.939}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8790283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.15612479178216, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13043212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05841064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.17882146066195, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0330810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97674560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.58163605932023, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.036376953125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31567.25, 81920.0], "load": 0.95, "temperature": 68, "power": 232.207}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.22381591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.45382040710268, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04339599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.37742934586593, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.39404296875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31567.25, 81920.0], "load": 0.95, "temperature": 68, "power": 239.431}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 74.31608578365234, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99554443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 74.95037152366315, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.09002685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.40782737218817, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31567.25, 81920.0], "load": 1.0, "temperature": 68, "power": 294.047}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98956298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0643310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.08519886718797, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2716064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 93.24138434256626, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99871826171875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31567.25, 81920.0], "load": 0.95, "temperature": 70, "power": 313.597}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 73.81712537516488, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9783935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92926025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 74.6839315191548, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31567.25, 81920.0], "load": 1.0, "temperature": 70, "power": 312.071}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-regnet_y_128gf.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "regnet_y_128gf", "--batch-size", "64"], "time": 1712628133.138333, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/resnet152-multi.0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/resnet152-multi.0.data new file mode 100644 index 000000000..472f843de --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/resnet152-multi.0.data @@ -0,0 +1,367 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "njobs", "n": 1}, "argv": {"--amp": true, "--model": "resnet152", "--batch-size": 256}, "tags": ["classification", "convnet", "multigpu", "resnet", "vision"], "weight": 5.0, "name": "resnet152-multi", "tag": ["resnet152-multi", "0"], "job-number": 0, "devices": ["0", "1"]}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 58, "power": 85.483, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.27, "memory": 0.010771942138671876}, "temperature": 62, "power": 97.72, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712628760.267209, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["torchrun", "--nproc_per_node=2", "--", "-m", "voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/resnet152-multi.0", "--checkpoint-hist", "1"], "time": 1712628760.2853827}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training in distributed mode with multiple processes, 1 device per process.Process 1, total 2, device cuda:1.\n", "pipe": "stderr"} +{"event": "line", "data": "Training in distributed mode with multiple processes, 1 device per process.Process 0, total 2, device cuda:0.\n", "pipe": "stderr"} +{"event": "line", "data": "Model resnet152 created, param count:60192808\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.2) calculated from base learning rate (0.1) and global batch size (512) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch DistributedDataParallel.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2457.25, 81920.0], "load": 0, "temperature": 55, "power": 82.231}, "1": {"memory": [2457.25, 81920.0], "load": 0, "temperature": 57, "power": 89.136}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.956526756286621}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 0/8 ( 0%)] Loss: 6.939 (6.94) Time: 5.014s, 102.12/s (5.014s, 102.12/s) LR: 1.000e-05 Data: 1.392 (1.392)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [18611.25, 81920.0], "load": 1.0, "temperature": 57, "power": 102.744}, "1": {"memory": [17983.25, 81920.0], "load": 1.0, "temperature": 60, "power": 279.964}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.939521789550781}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9394097328186035}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28121.25, 81920.0], "load": 0.88, "temperature": 59, "power": 303.644}, "1": {"memory": [28103.25, 81920.0], "load": 1.0, "temperature": 59, "power": 259.791}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.925026893615723}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 7/8 (100%)] Loss: 6.952 (6.95) Time: 0.818s, 625.89/s (1.143s, 448.12/s) LR: 1.000e-05 Data: 0.000 (0.186)\n", "pipe": "stderr"} +{"event": "line", "data": "Distributing BatchNorm running means and vars\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/8] Time: 1.679 (1.679) Loss: 6.9102 (6.9102) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.9766 ( 0.9766)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 8/8] Time: 0.756 (0.369) Loss: 6.8866 (6.9091) Acc@1: 0.0000 ( 0.0484) Acc@5: 28.1250 ( 0.5814)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/resnet152-multi.0/20240409-021246-resnet152-224/checkpoint-0.pth.tar', 0.04844961240310078)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28161.25, 81920.0], "load": 0, "temperature": 55, "power": 82.524}, "1": {"memory": [28141.25, 81920.0], "load": 0, "temperature": 58, "power": 89.211}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4873.25, 81920.0], "load": 0, "temperature": 55, "power": 82.621}, "1": {"memory": [5293.25, 81920.0], "load": 0, "temperature": 57, "power": 89.332}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.942824363708496}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 0/8 ( 0%)] Loss: 6.953 (6.95) Time: 1.518s, 337.37/s (1.518s, 337.37/s) LR: 4.001e-02 Data: 1.148 (1.148)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 1324.3711276726824, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.910190582275391}, "pipe": "data"} +{"event": "data", "data": {"rate": 1264.2530050374135, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.906571388244629}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 7/8 (100%)] Loss: 6.893 (6.92) Time: 0.371s, 1379.63/s (0.526s, 973.22/s) LR: 4.001e-02 Data: 0.000 (0.153)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 1277.7116822626517, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "line", "data": "Distributing BatchNorm running means and vars\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/8] Time: 1.061 (1.061) Loss: 6.8418 (6.8418) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 8/8] Time: 0.019 (0.269) Loss: 6.7092 (6.8606) Acc@1: 28.1250 ( 0.2180) Acc@5: 28.1250 ( 1.0417)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/resnet152-multi.0/20240409-021246-resnet152-224/checkpoint-1.pth.tar', 0.2180232558139535)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28113.25, 81920.0], "load": 1.0, "temperature": 59, "power": 240.134}, "1": {"memory": [28127.25, 81920.0], "load": 0.98, "temperature": 63, "power": 325.608}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 1371.1011096366803, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28597.25, 81920.0], "load": 0.03, "temperature": 56, "power": 83.937}, "1": {"memory": [28611.25, 81920.0], "load": 0, "temperature": 59, "power": 90.725}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.847428321838379}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 0/8 ( 0%)] Loss: 6.848 (6.85) Time: 1.951s, 262.41/s (1.951s, 262.41/s) LR: 8.001e-02 Data: 1.572 (1.572)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 1335.067318070696, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.846500396728516}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29081.25, 81920.0], "load": 1.0, "temperature": 60, "power": 312.54}, "1": {"memory": [29095.25, 81920.0], "load": 1.0, "temperature": 63, "power": 298.928}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 1260.9662224116012, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9479570388793945}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 7/8 (100%)] Loss: 6.925 (6.89) Time: 0.374s, 1370.76/s (0.581s, 880.64/s) LR: 8.001e-02 Data: 0.000 (0.207)\n", "pipe": "stderr"} +{"event": "line", "data": "Distributing BatchNorm running means and vars\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/8] Time: 1.057 (1.057) Loss: 6.7841 (6.7841) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.1953 ( 0.1953)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 8/8] Time: 0.021 (0.268) Loss: 6.5027 (6.8055) Acc@1: 0.0000 ( 0.2907) Acc@5: 3.1250 ( 1.2112)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/resnet152-multi.0/20240409-021246-resnet152-224/checkpoint-2.pth.tar', 0.29069767441860467)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 1366.938332785932, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29565.25, 81920.0], "load": 0, "temperature": 57, "power": 84.014}, "1": {"memory": [29579.25, 81920.0], "load": 0.7, "temperature": 63, "power": 304.389}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8527140617370605}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 0/8 ( 0%)] Loss: 6.831 (6.83) Time: 1.476s, 346.80/s (1.476s, 346.80/s) LR: 1.200e-01 Data: 1.105 (1.105)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29901.25, 81920.0], "load": 1.0, "temperature": 57, "power": 88.353}, "1": {"memory": [29917.25, 81920.0], "load": 0.78, "temperature": 59, "power": 91.279}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 541.502507854642, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.913934230804443}, "pipe": "data"} +{"event": "data", "data": {"rate": 1345.886828101472, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.015081882476807}, "pipe": "data"} +{"event": "data", "data": {"rate": 724.0798611268001, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 7/8 (100%)] Loss: 7.031 (6.93) Time: 0.375s, 1365.48/s (0.593s, 863.44/s) LR: 1.200e-01 Data: 0.000 (0.148)\n", "pipe": "stderr"} +{"event": "line", "data": "Distributing BatchNorm running means and vars\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/8] Time: 1.054 (1.054) Loss: 6.7746 (6.7746) Acc@1: 0.1953 ( 0.1953) Acc@5: 0.1953 ( 0.1953)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 8/8] Time: 0.019 (0.269) Loss: 6.5383 (6.8118) Acc@1: 3.1250 ( 0.2180) Acc@5: 3.1250 ( 0.7994)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 1362.1891138906337, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30049.25, 81920.0], "load": 0.92, "temperature": 61, "power": 307.247}, "1": {"memory": [30073.25, 81920.0], "load": 0.97, "temperature": 64, "power": 96.729}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30533.25, 81920.0], "load": 0.03, "temperature": 57, "power": 84.601}, "1": {"memory": [30557.25, 81920.0], "load": 0, "temperature": 59, "power": 91.887}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8400115966796875}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 0/8 ( 0%)] Loss: 6.855 (6.85) Time: 1.499s, 341.52/s (1.499s, 341.52/s) LR: 1.600e-01 Data: 1.121 (1.121)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 1087.0824441223647, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.920403957366943}, "pipe": "data"} +{"event": "data", "data": {"rate": 1353.1418750518148, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31023.25, 81920.0], "load": 0.89, "temperature": 61, "power": 84.405}, "1": {"memory": [31041.25, 81920.0], "load": 1.0, "temperature": 61, "power": 98.43}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.997241973876953}, "pipe": "data"} +{"event": "data", "data": {"rate": 798.5143260422002, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.045167922973633}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 7/8 (100%)] Loss: 7.052 (6.95) Time: 0.376s, 1360.11/s (0.624s, 821.14/s) LR: 1.600e-01 Data: 0.000 (0.191)\n", "pipe": "stderr"} +{"event": "line", "data": "Distributing BatchNorm running means and vars\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/8] Time: 1.053 (1.053) Loss: 6.7719 (6.7719) Acc@1: 0.0000 ( 0.0000) Acc@5: 2.5391 ( 2.5391)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 8/8] Time: 0.020 (0.268) Loss: 6.4059 (6.8284) Acc@1: 0.0000 ( 0.2180) Acc@5: 3.1250 ( 1.1386)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 1360.2020767921301, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31507.25, 81920.0], "load": 0.94, "temperature": 61, "power": 316.777}, "1": {"memory": [31525.25, 81920.0], "load": 0.53, "temperature": 61, "power": 96.813}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.843812942504883}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 0/8 ( 0%)] Loss: 6.857 (6.86) Time: 1.481s, 345.64/s (1.481s, 345.64/s) LR: 1.999e-01 Data: 1.108 (1.108)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31991.25, 81920.0], "load": 0, "temperature": 57, "power": 84.601}, "1": {"memory": [32009.25, 81920.0], "load": 1.0, "temperature": 60, "power": 96.228}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 1122.9960207294012, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.981951713562012}, "pipe": "data"} +{"event": "data", "data": {"rate": 1256.2398079418053, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.042436599731445}, "pipe": "data"} +{"event": "data", "data": {"rate": 1320.3049781236618, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 7/8 (100%)] Loss: 7.072 (6.96) Time: 0.375s, 1363.94/s (0.560s, 914.67/s) LR: 1.999e-01 Data: 0.000 (0.185)\n", "pipe": "stderr"} +{"event": "line", "data": "Distributing BatchNorm running means and vars\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/8] Time: 1.054 (1.054) Loss: 6.8240 (6.8240) Acc@1: 0.1953 ( 0.1953) Acc@5: 3.3203 ( 3.3203)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 8/8] Time: 0.021 (0.268) Loss: 6.4305 (6.8421) Acc@1: 0.0000 ( 0.2180) Acc@5: 0.0000 ( 0.9448)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 1363.4658541454078, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31991.25, 81920.0], "load": 0.64, "temperature": 58, "power": 85.57}, "1": {"memory": [32009.25, 81920.0], "load": 0, "temperature": 60, "power": 93.036}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32475.25, 81920.0], "load": 0, "temperature": 57, "power": 84.555}, "1": {"memory": [32829.25, 81920.0], "load": 0.9, "temperature": 63, "power": 297.337}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.844778060913086}, "pipe": "data"} +{"event": "line", "data": "Train: 6 [ 0/8 ( 0%)] Loss: 6.872 (6.87) Time: 1.478s, 346.31/s (1.478s, 346.31/s) LR: 1.998e-01 Data: 1.103 (1.103)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.931532859802246}, "pipe": "data"} +{"event": "data", "data": {"rate": 1236.97101589834, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 1332.9528811583223, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1027679443359375}, "pipe": "data"} +{"event": "line", "data": "Train: 6 [ 7/8 (100%)] Loss: 7.106 (6.99) Time: 0.375s, 1364.93/s (0.535s, 956.15/s) LR: 1.998e-01 Data: 0.000 (0.148)\n", "pipe": "stderr"} +{"event": "line", "data": "Distributing BatchNorm running means and vars\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/8] Time: 1.055 (1.055) Loss: 6.7671 (6.7671) Acc@1: 0.0000 ( 0.0000) Acc@5: 1.7578 ( 1.7578)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 8/8] Time: 0.019 (0.269) Loss: 6.1864 (6.8530) Acc@1: 0.0000 ( 0.1938) Acc@5: 25.0000 ( 1.0901)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 1356.5709174066246, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32959.25, 81920.0], "load": 0.92, "temperature": 62, "power": 299.859}, "1": {"memory": [32977.25, 81920.0], "load": 0.97, "temperature": 64, "power": 301.472}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33443.25, 81920.0], "load": 0.03, "temperature": 58, "power": 84.775}, "1": {"memory": [33461.25, 81920.0], "load": 0, "temperature": 60, "power": 92.66}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.882028102874756}, "pipe": "data"} +{"event": "line", "data": "Train: 7 [ 0/8 ( 0%)] Loss: 6.885 (6.89) Time: 1.472s, 347.92/s (1.472s, 347.92/s) LR: 1.997e-01 Data: 1.098 (1.098)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 1026.6276699472726, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.951415061950684}, "pipe": "data"} +{"event": "data", "data": {"rate": 1259.456731518551, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.986845016479492}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33927.25, 81920.0], "load": 0.9, "temperature": 62, "power": 299.391}, "1": {"memory": [33945.25, 81920.0], "load": 0.93, "temperature": 65, "power": 298.859}}}, "pipe": "data"} +{"event": "line", "data": "Train: 7 [ 7/8 (100%)] Loss: 7.091 (6.99) Time: 0.375s, 1364.91/s (0.563s, 909.96/s) LR: 1.997e-01 Data: 0.000 (0.186)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 1265.8699471868756, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "line", "data": "Distributing BatchNorm running means and vars\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/8] Time: 1.056 (1.056) Loss: 6.7634 (6.7634) Acc@1: 0.3906 ( 0.3906) Acc@5: 2.7344 ( 2.7344)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 8/8] Time: 0.021 (0.268) Loss: 6.2407 (6.8638) Acc@1: 0.0000 ( 0.2422) Acc@5: 21.8750 ( 1.2839)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 1360.9622492197793, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34411.25, 81920.0], "load": 0, "temperature": 58, "power": 310.484}, "1": {"memory": [34429.25, 81920.0], "load": 0.88, "temperature": 64, "power": 311.397}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.870962142944336}, "pipe": "data"} +{"event": "line", "data": "Train: 8 [ 0/8 ( 0%)] Loss: 6.880 (6.88) Time: 1.483s, 345.32/s (1.483s, 345.32/s) LR: 1.996e-01 Data: 1.108 (1.108)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 1111.004509649762, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.061315536499023}, "pipe": "data"} +{"event": "data", "data": {"rate": 1310.9110968591062, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34895.25, 81920.0], "load": 1.0, "temperature": 61, "power": 307.221}, "1": {"memory": [34913.25, 81920.0], "load": 1.0, "temperature": 63, "power": 297.389}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0355224609375}, "pipe": "data"} +{"event": "data", "data": {"rate": 1202.9990347706698, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 8 [ 7/8 (100%)] Loss: 7.069 (6.97) Time: 0.376s, 1361.50/s (0.526s, 973.87/s) LR: 1.996e-01 Data: 0.001 (0.149)\n", "pipe": "stderr"} +{"event": "line", "data": "Distributing BatchNorm running means and vars\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/8] Time: 1.053 (1.053) Loss: 6.8561 (6.8561) Acc@1: 0.0000 ( 0.0000) Acc@5: 2.9297 ( 2.9297)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 8/8] Time: 0.021 (0.271) Loss: 6.5238 (6.8705) Acc@1: 0.0000 ( 0.1696) Acc@5: 0.0000 ( 1.1386)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 1360.3817993355303, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35379.25, 81920.0], "load": 0.88, "temperature": 62, "power": 324.109}, "1": {"memory": [35397.25, 81920.0], "load": 0.48, "temperature": 61, "power": 92.744}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.934852600097656}, "pipe": "data"} +{"event": "line", "data": "Train: 9 [ 0/8 ( 0%)] Loss: 6.907 (6.91) Time: 1.481s, 345.75/s (1.481s, 345.75/s) LR: 1.996e-01 Data: 1.105 (1.105)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35863.25, 81920.0], "load": 0.2, "temperature": 60, "power": 84.405}, "1": {"memory": [35881.25, 81920.0], "load": 0.62, "temperature": 60, "power": 97.268}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.866626262664795}, "pipe": "data"} +{"event": "data", "data": {"rate": 1181.8057378150445, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.024938583374023}, "pipe": "data"} +{"event": "data", "data": {"rate": 1256.1874243544748, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 9 [ 7/8 (100%)] Loss: 7.067 (6.99) Time: 0.375s, 1364.84/s (0.566s, 904.29/s) LR: 1.996e-01 Data: 0.000 (0.190)\n", "pipe": "stderr"} +{"event": "line", "data": "Distributing BatchNorm running means and vars\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/8] Time: 1.053 (1.053) Loss: 6.8282 (6.8282) Acc@1: 1.1719 ( 1.1719) Acc@5: 1.5625 ( 1.5625)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 8/8] Time: 0.021 (0.269) Loss: 6.4799 (6.8560) Acc@1: 0.0000 ( 0.1453) Acc@5: 3.1250 ( 0.8236)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 1359.9479166155268, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35863.25, 81920.0], "load": 0.79, "temperature": 58, "power": 84.894}, "1": {"memory": [35881.25, 81920.0], "load": 0, "temperature": 61, "power": 92.856}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36347.25, 81920.0], "load": 0, "temperature": 57, "power": 84.402}, "1": {"memory": [36701.25, 81920.0], "load": 1.0, "temperature": 63, "power": 285.328}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9068522453308105}, "pipe": "data"} +{"event": "line", "data": "Train: 10 [ 0/8 ( 0%)] Loss: 6.903 (6.90) Time: 1.480s, 345.99/s (1.480s, 345.99/s) LR: 1.995e-01 Data: 1.106 (1.106)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 1207.3475645057154, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9865546226501465}, "pipe": "data"} +{"event": "data", "data": {"rate": 1359.982592435314, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0600690841674805}, "pipe": "data"} +{"event": "line", "data": "Train: 10 [ 7/8 (100%)] Loss: 7.088 (7.00) Time: 0.376s, 1363.49/s (0.525s, 975.25/s) LR: 1.995e-01 Data: 0.000 (0.149)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 1142.9553426207428, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "line", "data": "Distributing BatchNorm running means and vars\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/8] Time: 1.052 (1.052) Loss: 6.7794 (6.7794) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.1953 ( 0.1953)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 8/8] Time: 0.018 (0.268) Loss: 6.1963 (6.8511) Acc@1: 0.0000 ( 0.2422) Acc@5: 21.8750 ( 1.0417)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36831.25, 81920.0], "load": 0.98, "temperature": 61, "power": 264.402}, "1": {"memory": [36849.25, 81920.0], "load": 0.96, "temperature": 63, "power": 254.542}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 1362.468479943679, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37315.25, 81920.0], "load": 0.02, "temperature": 58, "power": 89.148}, "1": {"memory": [37333.25, 81920.0], "load": 0, "temperature": 60, "power": 92.159}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.888532638549805}, "pipe": "data"} +{"event": "line", "data": "Train: 11 [ 0/8 ( 0%)] Loss: 6.856 (6.86) Time: 1.482s, 345.46/s (1.482s, 345.46/s) LR: 1.993e-01 Data: 1.109 (1.109)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 1367.9210496457904, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944759368896484}, "pipe": "data"} +{"event": "data", "data": {"rate": 1087.8723736008021, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.971771717071533}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37799.25, 81920.0], "load": 0.91, "temperature": 61, "power": 302.805}, "1": {"memory": [37817.25, 81920.0], "load": 0.91, "temperature": 64, "power": 309.564}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 1246.5478678739312, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 11 [ 7/8 (100%)] Loss: 7.037 (6.95) Time: 0.376s, 1361.67/s (0.583s, 878.40/s) LR: 1.993e-01 Data: 0.000 (0.194)\n", "pipe": "stderr"} +{"event": "line", "data": "Distributing BatchNorm running means and vars\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/8] Time: 1.052 (1.052) Loss: 6.8024 (6.8024) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 8/8] Time: 0.024 (0.268) Loss: 6.1080 (6.8375) Acc@1: 9.3750 ( 0.2665) Acc@5: 28.1250 ( 1.0659)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 1361.6387335988004, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [38283.25, 81920.0], "load": 0.94, "temperature": 61, "power": 321.792}, "1": {"memory": [38301.25, 81920.0], "load": 0.94, "temperature": 63, "power": 310.62}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.893085479736328}, "pipe": "data"} +{"event": "line", "data": "Train: 12 [ 0/8 ( 0%)] Loss: 6.888 (6.89) Time: 1.482s, 345.59/s (1.482s, 345.59/s) LR: 1.992e-01 Data: 1.106 (1.106)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 1274.095660742109, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.953129768371582}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [38767.25, 81920.0], "load": 0.95, "temperature": 61, "power": 251.999}, "1": {"memory": [38785.25, 81920.0], "load": 1.0, "temperature": 62, "power": 300.2}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 1254.482586435745, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.029263973236084}, "pipe": "data"} +{"event": "line", "data": "Train: 12 [ 7/8 (100%)] Loss: 6.998 (6.94) Time: 0.375s, 1365.55/s (0.527s, 971.70/s) LR: 1.992e-01 Data: 0.000 (0.149)\n", "pipe": "stderr"} +{"event": "line", "data": "Distributing BatchNorm running means and vars\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/8] Time: 1.057 (1.057) Loss: 6.7230 (6.7230) Acc@1: 0.0000 ( 0.0000) Acc@5: 1.7578 ( 1.7578)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 8/8] Time: 0.019 (0.269) Loss: 6.1960 (6.8388) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 1.1628)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 1357.4474073241026, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [39251.25, 81920.0], "load": 0.62, "temperature": 61, "power": 316.004}, "1": {"memory": [39269.25, 81920.0], "load": 0.14, "temperature": 60, "power": 91.057}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8946404457092285}, "pipe": "data"} +{"event": "line", "data": "Train: 13 [ 0/8 ( 0%)] Loss: 6.889 (6.89) Time: 1.478s, 346.50/s (1.478s, 346.50/s) LR: 1.991e-01 Data: 1.104 (1.104)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [39735.25, 81920.0], "load": 0, "temperature": 56, "power": 83.819}, "1": {"memory": [39753.25, 81920.0], "load": 1.0, "temperature": 59, "power": 102.636}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 1118.7765929727286, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.861790657043457}, "pipe": "data"} +{"event": "data", "data": {"rate": 1258.3631719628308, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.946868896484375}, "pipe": "data"} +{"event": "line", "data": "Train: 13 [ 7/8 (100%)] Loss: 6.990 (6.94) Time: 0.375s, 1365.42/s (0.562s, 911.65/s) LR: 1.991e-01 Data: 0.000 (0.186)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 1251.9276476486607, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "line", "data": "Distributing BatchNorm running means and vars\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/8] Time: 1.053 (1.053) Loss: 6.7353 (6.7353) Acc@1: 0.9766 ( 0.9766) Acc@5: 1.7578 ( 1.7578)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 8/8] Time: 0.025 (0.269) Loss: 6.5137 (6.8287) Acc@1: 0.0000 ( 0.1938) Acc@5: 3.1250 ( 1.0174)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 1364.0907267079583, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [39735.25, 81920.0], "load": 0.24, "temperature": 57, "power": 84.58}, "1": {"memory": [39753.25, 81920.0], "load": 0, "temperature": 59, "power": 95.837}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [40219.25, 81920.0], "load": 0, "temperature": 56, "power": 87.799}, "1": {"memory": [40573.25, 81920.0], "load": 1.0, "temperature": 61, "power": 303.089}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.829690456390381}, "pipe": "data"} +{"event": "line", "data": "Train: 14 [ 0/8 ( 0%)] Loss: 6.837 (6.84) Time: 1.484s, 345.03/s (1.484s, 345.03/s) LR: 1.989e-01 Data: 1.110 (1.110)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.860248565673828}, "pipe": "data"} +{"event": "data", "data": {"rate": 1263.2709772279131, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.946683406829834}, "pipe": "data"} +{"event": "data", "data": {"rate": 1324.7900498000135, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 14 [ 7/8 (100%)] Loss: 6.996 (6.92) Time: 0.374s, 1368.71/s (0.526s, 974.04/s) LR: 1.989e-01 Data: 0.000 (0.149)\n", "pipe": "stderr"} +{"event": "line", "data": "Distributing BatchNorm running means and vars\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/8] Time: 1.056 (1.056) Loss: 6.7577 (6.7577) Acc@1: 0.0000 ( 0.0000) Acc@5: 1.5625 ( 1.5625)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 8/8] Time: 0.019 (0.268) Loss: 6.4719 (6.8167) Acc@1: 0.0000 ( 0.2907) Acc@5: 0.0000 ( 1.1386)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [40703.25, 81920.0], "load": 0.99, "temperature": 60, "power": 280.282}, "1": {"memory": [40721.25, 81920.0], "load": 1.0, "temperature": 63, "power": 256.576}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 1358.8914792860926, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [41187.25, 81920.0], "load": 0.03, "temperature": 56, "power": 87.951}, "1": {"memory": [41205.25, 81920.0], "load": 0, "temperature": 58, "power": 90.583}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.845968723297119}, "pipe": "data"} +{"event": "line", "data": "Train: 15 [ 0/8 ( 0%)] Loss: 6.850 (6.85) Time: 1.485s, 344.68/s (1.485s, 344.68/s) LR: 1.988e-01 Data: 1.109 (1.109)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 1347.282533563857, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.865680694580078}, "pipe": "data"} +{"event": "data", "data": {"rate": 1199.7013324787463, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931249618530273}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [41675.25, 81920.0], "load": 0.88, "temperature": 60, "power": 298.398}, "1": {"memory": [41689.25, 81920.0], "load": 1.0, "temperature": 63, "power": 291.776}}}, "pipe": "data"} +{"event": "line", "data": "Train: 15 [ 7/8 (100%)] Loss: 6.939 (6.89) Time: 0.374s, 1367.63/s (0.569s, 899.82/s) LR: 1.988e-01 Data: 0.000 (0.192)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 1268.607650632959, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "line", "data": "Distributing BatchNorm running means and vars\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/8] Time: 1.054 (1.054) Loss: 6.7293 (6.7293) Acc@1: 0.0000 ( 0.0000) Acc@5: 1.7578 ( 1.7578)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 8/8] Time: 0.022 (0.276) Loss: 6.5311 (6.8095) Acc@1: 0.0000 ( 0.2180) Acc@5: 0.0000 ( 1.1386)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 1360.5644677975768, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [42159.25, 81920.0], "load": 0.93, "temperature": 58, "power": 281.707}, "1": {"memory": [42173.25, 81920.0], "load": 0.92, "temperature": 59, "power": 101.626}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8549933433532715}, "pipe": "data"} +{"event": "line", "data": "Train: 16 [ 0/8 ( 0%)] Loss: 6.843 (6.84) Time: 1.480s, 345.89/s (1.480s, 345.89/s) LR: 1.986e-01 Data: 1.107 (1.107)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 1099.3726266917658, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.847872734069824}, "pipe": "data"} +{"event": "data", "data": {"rate": 1359.0854916881399, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [42643.25, 81920.0], "load": 1.0, "temperature": 58, "power": 299.281}, "1": {"memory": [42657.25, 81920.0], "load": 1.0, "temperature": 61, "power": 163.624}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90403413772583}, "pipe": "data"} +{"event": "data", "data": {"rate": 1164.8638203551384, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 16 [ 7/8 (100%)] Loss: 6.905 (6.87) Time: 0.375s, 1366.64/s (0.524s, 976.96/s) LR: 1.986e-01 Data: 0.000 (0.149)\n", "pipe": "stderr"} +{"event": "line", "data": "Distributing BatchNorm running means and vars\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/8] Time: 1.032 (1.032) Loss: 6.7216 (6.7216) Acc@1: 0.0000 ( 0.0000) Acc@5: 2.1484 ( 2.1484)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 8/8] Time: 0.019 (0.265) Loss: 6.5287 (6.8020) Acc@1: 0.0000 ( 0.2180) Acc@5: 3.1250 ( 1.1386)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 1363.0618688777292, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [43087.25, 81920.0], "load": 0.87, "temperature": 59, "power": 297.544}, "1": {"memory": [43101.25, 81920.0], "load": 0.38, "temperature": 58, "power": 300.303}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.802212715148926}, "pipe": "data"} +{"event": "line", "data": "Train: 17 [ 0/8 ( 0%)] Loss: 6.834 (6.83) Time: 1.483s, 345.29/s (1.483s, 345.29/s) LR: 1.984e-01 Data: 1.108 (1.108)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [43087.25, 81920.0], "load": 0, "temperature": 57, "power": 305.914}, "1": {"memory": [43101.25, 81920.0], "load": 1.0, "temperature": 60, "power": 318.886}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8169050216674805}, "pipe": "data"} +{"event": "data", "data": {"rate": 1264.8116703204448, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.918081283569336}, "pipe": "data"} +{"event": "data", "data": {"rate": 1331.2683235304028, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "stop", "data": null, "pipe": "data"} +{"event": "line", "data": "[2024-04-09 02:15:07,813] torch.distributed.elastic.agent.server.api: [WARNING] Received Signals.SIGTERM death signal, shutting down workers\n", "pipe": "stderr"} +{"event": "line", "data": "[2024-04-09 02:15:07,813] torch.distributed.elastic.multiprocessing.api: [WARNING] Sending process 67268 closing signal SIGTERM\n", "pipe": "stderr"} +{"event": "line", "data": "[2024-04-09 02:15:07,813] torch.distributed.elastic.multiprocessing.api: [WARNING] Sending process 67269 closing signal SIGTERM\n", "pipe": "stderr"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/torchrun\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py\", line 346, in wrapper\n", "pipe": "stderr"} +{"event": "line", "data": " return f(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/run.py\", line 806, in main\n", "pipe": "stderr"} +{"event": "line", "data": " run(args)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/run.py\", line 797, in run\n", "pipe": "stderr"} +{"event": "line", "data": " elastic_launch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/launcher/api.py\", line 134, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " return launch_agent(self._config, self._entrypoint, list(args))\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/launcher/api.py\", line 255, in launch_agent\n", "pipe": "stderr"} +{"event": "line", "data": " result = agent.run()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/elastic/metrics/api.py\", line 124, in wrapper\n", "pipe": "stderr"} +{"event": "line", "data": " result = f(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/elastic/agent/server/api.py\", line 736, in run\n", "pipe": "stderr"} +{"event": "line", "data": " result = self._invoke_run(role)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/elastic/agent/server/api.py\", line 877, in _invoke_run\n", "pipe": "stderr"} +{"event": "line", "data": " time.sleep(monitor_interval)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/elastic/multiprocessing/api.py\", line 62, in _terminate_process_handler\n", "pipe": "stderr"} +{"event": "line", "data": " raise SignalException(f\"Process {os.getpid()} got signal: {sigval}\", sigval=sigval)\n", "pipe": "stderr"} +{"event": "line", "data": "torch.distributed.elastic.multiprocessing.api.SignalException: Process 67258 got signal: 15\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["torchrun", "--nproc_per_node=2", "--", "-m", "voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152-multi.0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/resnet152-multi.0", "--checkpoint-hist", "1"], "time": 1712628908.6323633, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/resnet152.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/resnet152.D0.data new file mode 100644 index 000000000..afcdc8487 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/resnet152.D0.data @@ -0,0 +1,296 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "resnet152", "--batch-size": 256}, "tags": ["classification", "convnet", "resnet", "vision"], "weight": 1.0, "name": "resnet152", "tag": ["resnet152", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.18, "memory": 0.010771942138671876}, "temperature": 68, "power": 98.764, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 70, "power": 108.446, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712628613.765104, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/resnet152.D0", "--checkpoint-hist", "1"], "time": 1712628616.817858}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model resnet152 created, param count:60192808\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.1) calculated from base learning rate (0.1) and global batch size (256) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1833.25, 81920.0], "load": 0, "temperature": 62, "power": 88.841}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.928679466247559}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 0/16 ( 0%)] Loss: 6.929 (6.93) Time: 5.633s, 45.44/s (5.633s, 45.44/s) LR: 1.000e-05 Data: 2.012 (2.012)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [12567.25, 81920.0], "load": 1.0, "temperature": 62, "power": 92.2}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93458366394043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.960927486419678}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.942133903503418}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27171.25, 81920.0], "load": 0.99, "temperature": 67, "power": 145.894}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.946683406829834}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.957451820373535}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93405818939209}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 15/16 (100%)] Loss: 6.934 (6.94) Time: 0.365s, 701.65/s (0.738s, 346.80/s) LR: 1.000e-05 Data: 0.000 (0.140)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 613.8429713875979, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.210 (1.210) Loss: 6.9339 (6.9339) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.260 (0.269) Loss: 6.8979 (6.9128) Acc@1: 0.0000 ( 0.1453) Acc@5: 28.1250 ( 0.5329)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/resnet152.D0/20240409-021022-resnet152-224/checkpoint-0.pth.tar', 0.14534883720930233)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27171.25, 81920.0], "load": 0.99, "temperature": 65, "power": 260.073}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 700.9813330669896, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27655.25, 81920.0], "load": 0.56, "temperature": 67, "power": 272.159}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4955.25, 81920.0], "load": 0, "temperature": 63, "power": 89.037}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.963784694671631}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 0/16 ( 0%)] Loss: 6.964 (6.96) Time: 1.953s, 131.08/s (1.953s, 131.08/s) LR: 2.001e-02 Data: 1.105 (1.105)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.937549114227295}, "pipe": "data"} +{"event": "data", "data": {"rate": 446.0898152331649, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.884865760803223}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27201.25, 81920.0], "load": 0.99, "temperature": 67, "power": 258.193}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 622.3297450278327, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.887411117553711}, "pipe": "data"} +{"event": "data", "data": {"rate": 621.5433716165501, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.929993152618408}, "pipe": "data"} +{"event": "data", "data": {"rate": 619.1703713781228, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.914115905761719}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 15/16 (100%)] Loss: 6.979 (6.92) Time: 0.368s, 695.54/s (0.477s, 536.48/s) LR: 2.001e-02 Data: 0.000 (0.081)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 626.0022699820438, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.558 (1.558) Loss: 6.8469 (6.8469) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.020 (0.246) Loss: 6.5618 (6.8343) Acc@1: 18.7500 ( 0.2422) Acc@5: 28.1250 ( 1.1870)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/resnet152.D0/20240409-021022-resnet152-224/checkpoint-1.pth.tar', 0.24224806201550386)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27201.25, 81920.0], "load": 0.93, "temperature": 67, "power": 262.327}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 695.1646330430922, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27685.25, 81920.0], "load": 0.94, "temperature": 67, "power": 183.846}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27723.25, 81920.0], "load": 0, "temperature": 63, "power": 88.903}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8321685791015625}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 0/16 ( 0%)] Loss: 6.832 (6.83) Time: 1.678s, 152.58/s (1.678s, 152.58/s) LR: 4.001e-02 Data: 1.311 (1.311)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 690.2385374061092, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8606181144714355}, "pipe": "data"} +{"event": "data", "data": {"rate": 618.1986188415743, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90275764465332}, "pipe": "data"} +{"event": "data", "data": {"rate": 618.8766940767396, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.943968772888184}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28169.25, 81920.0], "load": 0.92, "temperature": 68, "power": 289.236}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 617.0749458412282, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.979184150695801}, "pipe": "data"} +{"event": "data", "data": {"rate": 619.9232763580577, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.945030689239502}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 15/16 (100%)] Loss: 6.945 (6.92) Time: 0.367s, 696.77/s (0.461s, 555.66/s) LR: 4.001e-02 Data: 0.000 (0.093)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.420 (1.420) Loss: 6.8006 (6.8006) Acc@1: 0.7812 ( 0.7812) Acc@5: 3.1250 ( 3.1250)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.020 (0.246) Loss: 6.3738 (6.8022) Acc@1: 0.0000 ( 0.1938) Acc@5: 3.1250 ( 0.9932)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 696.5005311174775, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28169.25, 81920.0], "load": 0, "temperature": 64, "power": 89.772}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28653.25, 81920.0], "load": 0.73, "temperature": 67, "power": 283.916}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.866513252258301}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 0/16 ( 0%)] Loss: 6.867 (6.87) Time: 1.589s, 161.11/s (1.589s, 161.11/s) LR: 6.000e-02 Data: 1.220 (1.220)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.927303314208984}, "pipe": "data"} +{"event": "data", "data": {"rate": 648.9429694976271, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29137.25, 81920.0], "load": 0.93, "temperature": 67, "power": 310.502}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 694.1843299504254, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.964653968811035}, "pipe": "data"} +{"event": "data", "data": {"rate": 585.3359872466996, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.976548194885254}, "pipe": "data"} +{"event": "data", "data": {"rate": 615.6670812708813, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.063714027404785}, "pipe": "data"} +{"event": "data", "data": {"rate": 619.1712704066151, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.027237415313721}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29137.25, 81920.0], "load": 0.93, "temperature": 68, "power": 326.379}}}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 15/16 (100%)] Loss: 7.027 (6.96) Time: 0.369s, 694.32/s (0.456s, 560.94/s) LR: 6.000e-02 Data: 0.001 (0.088)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.381 (1.381) Loss: 6.8759 (6.8759) Acc@1: 0.0000 ( 0.0000) Acc@5: 3.5156 ( 3.5156)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.020 (0.245) Loss: 6.2936 (6.8193) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 1.0901)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/resnet152.D0/20240409-021022-resnet152-224/checkpoint-3.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 694.472422843934, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29621.25, 81920.0], "load": 0.01, "temperature": 64, "power": 327.272}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29621.25, 81920.0], "load": 0, "temperature": 62, "power": 93.641}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.878335952758789}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 0/16 ( 0%)] Loss: 6.878 (6.88) Time: 1.598s, 160.19/s (1.598s, 160.19/s) LR: 8.000e-02 Data: 1.229 (1.229)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.928692817687988}, "pipe": "data"} +{"event": "data", "data": {"rate": 613.9714027725695, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.981250762939453}, "pipe": "data"} +{"event": "data", "data": {"rate": 614.5711649526363, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.062885284423828}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30105.25, 81920.0], "load": 0.99, "temperature": 65, "power": 297.055}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 615.3054667886173, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.069405555725098}, "pipe": "data"} +{"event": "data", "data": {"rate": 613.6232594784981, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.047870635986328}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 15/16 (100%)] Loss: 7.048 (7.01) Time: 0.368s, 695.28/s (0.457s, 560.37/s) LR: 8.000e-02 Data: 0.000 (0.089)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 623.1805759268864, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.196 (1.196) Loss: 6.8326 (6.8326) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.019 (0.245) Loss: 6.5630 (6.8449) Acc@1: 0.0000 ( 0.1938) Acc@5: 0.0000 ( 1.2355)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 694.8288357288977, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30105.25, 81920.0], "load": 0, "temperature": 63, "power": 89.196}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30589.25, 81920.0], "load": 0.93, "temperature": 65, "power": 107.738}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.877357482910156}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 0/16 ( 0%)] Loss: 6.877 (6.88) Time: 1.544s, 165.83/s (1.544s, 165.83/s) LR: 9.993e-02 Data: 1.176 (1.176)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 584.26715819389, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9659743309021}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31073.25, 81920.0], "load": 0.92, "temperature": 66, "power": 310.413}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 613.4345977322391, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.082920074462891}, "pipe": "data"} +{"event": "data", "data": {"rate": 614.4878348911193, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1171112060546875}, "pipe": "data"} +{"event": "data", "data": {"rate": 616.4622531481357, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.112399101257324}, "pipe": "data"} +{"event": "data", "data": {"rate": 615.8728406832614, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08535099029541}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31073.25, 81920.0], "load": 0.92, "temperature": 66, "power": 282.48}}}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 15/16 (100%)] Loss: 7.085 (7.04) Time: 0.368s, 695.29/s (0.453s, 564.53/s) LR: 9.993e-02 Data: 0.001 (0.085)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.389 (1.389) Loss: 6.9317 (6.9317) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.3906 ( 0.3906)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.020 (0.245) Loss: 6.3712 (6.8751) Acc@1: 0.0000 ( 0.2422) Acc@5: 3.1250 ( 0.9448)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 694.1946368171073, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31557.25, 81920.0], "load": 0.07, "temperature": 65, "power": 88.1}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31557.25, 81920.0], "load": 0, "temperature": 61, "power": 91.261}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9003705978393555}, "pipe": "data"} +{"event": "line", "data": "Train: 6 [ 0/16 ( 0%)] Loss: 6.900 (6.90) Time: 1.510s, 169.51/s (1.510s, 169.51/s) LR: 9.990e-02 Data: 1.141 (1.141)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 642.0732899673354, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.005989074707031}, "pipe": "data"} +{"event": "data", "data": {"rate": 480.965939226538, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9920854568481445}, "pipe": "data"} +{"event": "data", "data": {"rate": 607.0555644779638, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.015820026397705}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32041.25, 81920.0], "load": 0.93, "temperature": 65, "power": 301.084}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 613.7148418769224, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.093565940856934}, "pipe": "data"} +{"event": "data", "data": {"rate": 612.7624145066777, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.15126895904541}, "pipe": "data"} +{"event": "line", "data": "Train: 6 [ 15/16 (100%)] Loss: 7.151 (7.04) Time: 0.368s, 694.91/s (0.463s, 553.51/s) LR: 9.990e-02 Data: 0.000 (0.083)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 622.9281631334007, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.057 (1.057) Loss: 6.8126 (6.8126) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.021 (0.245) Loss: 6.4990 (6.8584) Acc@1: 0.0000 ( 0.2422) Acc@5: 0.0000 ( 1.1628)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32041.25, 81920.0], "load": 0, "temperature": 61, "power": 87.685}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 694.3238267744503, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32525.25, 81920.0], "load": 0.96, "temperature": 62, "power": 291.52}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.870067596435547}, "pipe": "data"} +{"event": "line", "data": "Train: 7 [ 0/16 ( 0%)] Loss: 6.870 (6.87) Time: 1.481s, 172.84/s (1.481s, 172.84/s) LR: 9.987e-02 Data: 1.113 (1.113)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 661.6129935343961, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.952033042907715}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33009.25, 81920.0], "load": 0.94, "temperature": 63, "power": 305.036}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 615.2512084142576, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0446271896362305}, "pipe": "data"} +{"event": "data", "data": {"rate": 614.0164357217203, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.093796253204346}, "pipe": "data"} +{"event": "data", "data": {"rate": 616.542434650548, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.147687911987305}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33009.25, 81920.0], "load": 0.94, "temperature": 63, "power": 271.935}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 618.1711566026128, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.093276500701904}, "pipe": "data"} +{"event": "line", "data": "Train: 7 [ 15/16 (100%)] Loss: 7.093 (7.03) Time: 0.368s, 696.41/s (0.449s, 570.26/s) LR: 9.987e-02 Data: 0.001 (0.081)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.322 (1.322) Loss: 6.8546 (6.8546) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.020 (0.244) Loss: 6.5573 (6.8523) Acc@1: 0.0000 ( 0.1938) Acc@5: 0.0000 ( 1.0417)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 696.0714094003323, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33493.25, 81920.0], "load": 0.65, "temperature": 60, "power": 308.671}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33493.25, 81920.0], "load": 0.03, "temperature": 59, "power": 86.124}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9194512367248535}, "pipe": "data"} +{"event": "line", "data": "Train: 8 [ 0/16 ( 0%)] Loss: 6.919 (6.92) Time: 1.481s, 172.81/s (1.481s, 172.81/s) LR: 9.982e-02 Data: 1.114 (1.114)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 618.5558546018066, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.976428508758545}, "pipe": "data"} +{"event": "data", "data": {"rate": 647.555181172352, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33977.25, 81920.0], "load": 0.93, "temperature": 63, "power": 312.542}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.012627601623535}, "pipe": "data"} +{"event": "data", "data": {"rate": 697.311799365585, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.050484657287598}, "pipe": "data"} +{"event": "data", "data": {"rate": 592.0237536950666, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.026584148406982}, "pipe": "data"} +{"event": "data", "data": {"rate": 618.295486127524, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.117252349853516}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33977.25, 81920.0], "load": 0.99, "temperature": 64, "power": 301.227}}}, "pipe": "data"} +{"event": "line", "data": "Train: 8 [ 15/16 (100%)] Loss: 7.117 (7.01) Time: 0.368s, 696.51/s (0.448s, 570.84/s) LR: 9.982e-02 Data: 0.000 (0.081)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 623.874295295296, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.398 (1.398) Loss: 6.8428 (6.8428) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.020 (0.245) Loss: 6.6180 (6.8503) Acc@1: 0.0000 ( 0.1938) Acc@5: 0.0000 ( 0.9932)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 696.2445702000542, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34461.25, 81920.0], "load": 0, "temperature": 61, "power": 145.56}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34461.25, 81920.0], "load": 0, "temperature": 59, "power": 86.167}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.920931816101074}, "pipe": "data"} +{"event": "line", "data": "Train: 9 [ 0/16 ( 0%)] Loss: 6.921 (6.92) Time: 1.475s, 173.54/s (1.475s, 173.54/s) LR: 9.978e-02 Data: 1.108 (1.108)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 599.0484833232583, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.885295867919922}, "pipe": "data"} +{"event": "data", "data": {"rate": 637.3916430783277, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99557638168335}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34945.25, 81920.0], "load": 0.92, "temperature": 64, "power": 303.191}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 685.127654947275, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.971601486206055}, "pipe": "data"} +{"event": "data", "data": {"rate": 695.332621336269, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.091066360473633}, "pipe": "data"} +{"event": "data", "data": {"rate": 597.176397665203, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.032480716705322}, "pipe": "data"} +{"event": "line", "data": "Train: 9 [ 15/16 (100%)] Loss: 7.032 (6.98) Time: 0.367s, 697.47/s (0.448s, 571.53/s) LR: 9.978e-02 Data: 0.000 (0.081)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34945.25, 81920.0], "load": 0.98, "temperature": 64, "power": 300.238}}}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.420 (1.420) Loss: 6.8012 (6.8012) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.020 (0.244) Loss: 6.4277 (6.8260) Acc@1: 0.0000 ( 0.2422) Acc@5: 0.0000 ( 1.2112)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 696.8491885002016, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35429.25, 81920.0], "load": 0.79, "temperature": 63, "power": 308.13}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35429.25, 81920.0], "load": 0, "temperature": 59, "power": 85.644}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.835842132568359}, "pipe": "data"} +{"event": "line", "data": "Train: 10 [ 0/16 ( 0%)] Loss: 6.836 (6.84) Time: 1.507s, 169.85/s (1.507s, 169.85/s) LR: 9.973e-02 Data: 1.140 (1.140)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 587.1301699741384, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.920172691345215}, "pipe": "data"} +{"event": "data", "data": {"rate": 614.298566037898, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912924766540527}, "pipe": "data"} +{"event": "data", "data": {"rate": 614.4361431001327, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "stop", "data": null, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/resnet152.D0", "--checkpoint-hist", "1"], "time": 1712628755.47188, "return_code": -15}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/resnet152.D1.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/resnet152.D1.data new file mode 100644 index 000000000..2499b3d0b --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/resnet152.D1.data @@ -0,0 +1,304 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "timm", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "plan": {"method": "per_gpu"}, "argv": {"--amp": true, "--model": "resnet152", "--batch-size": 256}, "tags": ["classification", "convnet", "resnet", "vision"], "weight": 1.0, "name": "resnet152", "tag": ["resnet152", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 66, "power": 95.729, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 68, "power": 102.748, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712628616.797756, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/resnet152.D1", "--checkpoint-hist", "1"], "time": 1712628616.825474}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model resnet152 created, param count:60192808\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.1) calculated from base learning rate (0.1) and global batch size (256) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.928679466247559}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2459.25, 81920.0], "load": 0, "temperature": 64, "power": 98.583}}}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 0/16 ( 0%)] Loss: 6.929 (6.93) Time: 5.135s, 49.85/s (5.135s, 49.85/s) LR: 1.000e-05 Data: 1.464 (1.464)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [12899.25, 81920.0], "load": 1.0, "temperature": 65, "power": 101.368}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93458366394043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.960927486419678}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.942133903503418}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27185.25, 81920.0], "load": 0.99, "temperature": 69, "power": 296.075}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.946683883666992}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.957444190979004}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.934067249298096}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 15/16 (100%)] Loss: 6.934 (6.94) Time: 0.364s, 704.08/s (0.703s, 364.24/s) LR: 1.000e-05 Data: 0.000 (0.106)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 622.693151431237, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.200 (1.200) Loss: 6.9339 (6.9339) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.256 (0.271) Loss: 6.8979 (6.9128) Acc@1: 0.0000 ( 0.1453) Acc@5: 28.1250 ( 0.5329)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/resnet152.D1/20240409-021022-resnet152-224/checkpoint-0.pth.tar', 0.14534883720930233)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27185.25, 81920.0], "load": 0.99, "temperature": 67, "power": 272.567}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 703.5000573325067, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27669.25, 81920.0], "load": 0.55, "temperature": 69, "power": 224.084}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [4955.25, 81920.0], "load": 0, "temperature": 65, "power": 97.217}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96378755569458}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 0/16 ( 0%)] Loss: 6.964 (6.96) Time: 1.936s, 132.21/s (1.936s, 132.21/s) LR: 2.001e-02 Data: 1.104 (1.104)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.906617641448975}, "pipe": "data"} +{"event": "data", "data": {"rate": 449.32836528511723, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27213.25, 81920.0], "load": 0.99, "temperature": 70, "power": 303.488}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.927833557128906}, "pipe": "data"} +{"event": "data", "data": {"rate": 615.3550270637307, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.902897834777832}, "pipe": "data"} +{"event": "data", "data": {"rate": 618.0982405005429, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.895452499389648}, "pipe": "data"} +{"event": "data", "data": {"rate": 617.2235969882179, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9780731201171875}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 15/16 (100%)] Loss: 6.978 (6.92) Time: 0.369s, 694.66/s (0.477s, 536.54/s) LR: 2.001e-02 Data: 0.000 (0.081)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27215.25, 81920.0], "load": 0.94, "temperature": 70, "power": 309.343}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 625.3174508921413, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.542 (1.542) Loss: 6.8471 (6.8471) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.020 (0.250) Loss: 6.5643 (6.8342) Acc@1: 18.7500 ( 0.2422) Acc@5: 28.1250 ( 1.2355)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/resnet152.D1/20240409-021022-resnet152-224/checkpoint-1.pth.tar', 0.24224806201550386)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 694.5218075873073, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27699.25, 81920.0], "load": 0.94, "temperature": 70, "power": 261.53}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [27737.25, 81920.0], "load": 0, "temperature": 65, "power": 97.481}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.832368850708008}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 0/16 ( 0%)] Loss: 6.832 (6.83) Time: 1.589s, 161.12/s (1.589s, 161.12/s) LR: 4.001e-02 Data: 1.222 (1.222)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 620.2249072156865, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85904598236084}, "pipe": "data"} +{"event": "data", "data": {"rate": 694.4590674445743, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.898777008056641}, "pipe": "data"} +{"event": "data", "data": {"rate": 580.6681612664416, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.948024749755859}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28183.25, 81920.0], "load": 0.92, "temperature": 70, "power": 258.982}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 616.3398878860089, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.986285209655762}, "pipe": "data"} +{"event": "data", "data": {"rate": 615.9807193735803, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.936272144317627}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 15/16 (100%)] Loss: 6.936 (6.92) Time: 0.369s, 694.07/s (0.456s, 560.80/s) LR: 4.001e-02 Data: 0.000 (0.088)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.407 (1.407) Loss: 6.7988 (6.7988) Acc@1: 2.7344 ( 2.7344) Acc@5: 3.1250 ( 3.1250)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.020 (0.250) Loss: 6.3715 (6.8029) Acc@1: 0.0000 ( 0.1938) Acc@5: 25.0000 ( 1.1386)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 693.4401613537017, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28221.25, 81920.0], "load": 0, "temperature": 66, "power": 98.81}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [28667.25, 81920.0], "load": 0.99, "temperature": 68, "power": 255.755}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8605194091796875}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 0/16 ( 0%)] Loss: 6.861 (6.86) Time: 1.506s, 169.94/s (1.506s, 169.94/s) LR: 6.000e-02 Data: 1.139 (1.139)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.922441005706787}, "pipe": "data"} +{"event": "data", "data": {"rate": 604.4343261858411, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [29151.25, 81920.0], "load": 0.91, "temperature": 70, "power": 290.311}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.963921546936035}, "pipe": "data"} +{"event": "data", "data": {"rate": 608.4291325204235, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9868316650390625}, "pipe": "data"} +{"event": "data", "data": {"rate": 609.8773961088269, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.055777549743652}, "pipe": "data"} +{"event": "data", "data": {"rate": 612.1825787096851, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.022915840148926}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [29151.25, 81920.0], "load": 0.92, "temperature": 70, "power": 290.428}}}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 15/16 (100%)] Loss: 7.023 (6.96) Time: 0.370s, 692.12/s (0.452s, 565.85/s) LR: 6.000e-02 Data: 0.000 (0.083)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 618.5155206302248, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.359 (1.359) Loss: 6.8991 (6.8991) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.020 (0.249) Loss: 6.3486 (6.8199) Acc@1: 0.0000 ( 0.2180) Acc@5: 0.0000 ( 1.1143)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 691.8598326031622, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [29635.25, 81920.0], "load": 0.24, "temperature": 66, "power": 320.501}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [29635.25, 81920.0], "load": 0, "temperature": 64, "power": 97.481}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8735032081604}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 0/16 ( 0%)] Loss: 6.874 (6.87) Time: 1.450s, 176.54/s (1.450s, 176.54/s) LR: 8.000e-02 Data: 1.081 (1.081)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 605.689572444062, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92647647857666}, "pipe": "data"} +{"event": "data", "data": {"rate": 687.9651818671567, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.978068828582764}, "pipe": "data"} +{"event": "data", "data": {"rate": 691.7742235893394, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05295467376709}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [30119.25, 81920.0], "load": 0.91, "temperature": 70, "power": 312.72}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 581.6887362440696, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.070687294006348}, "pipe": "data"} +{"event": "data", "data": {"rate": 611.1797608255122, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.041446208953857}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 15/16 (100%)] Loss: 7.041 (7.00) Time: 0.369s, 693.07/s (0.449s, 570.45/s) LR: 8.000e-02 Data: 0.000 (0.079)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.342 (1.342) Loss: 6.7782 (6.7782) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.020 (0.249) Loss: 6.4534 (6.8382) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 1.1628)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/resnet152.D1/20240409-021022-resnet152-224/checkpoint-4.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 692.1931992263328, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [30157.25, 81920.0], "load": 0, "temperature": 65, "power": 101.998}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [30603.25, 81920.0], "load": 0.62, "temperature": 65, "power": 102.066}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87905216217041}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 0/16 ( 0%)] Loss: 6.879 (6.88) Time: 1.465s, 174.75/s (1.465s, 174.75/s) LR: 9.993e-02 Data: 1.097 (1.097)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 694.2075211446188, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98189115524292}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31087.25, 81920.0], "load": 0.91, "temperature": 68, "power": 295.154}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 583.377750085869, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077712059020996}, "pipe": "data"} +{"event": "data", "data": {"rate": 610.3802684729476, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.115594863891602}, "pipe": "data"} +{"event": "data", "data": {"rate": 613.6067444182632, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.099220275878906}, "pipe": "data"} +{"event": "data", "data": {"rate": 613.9234465904024, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.092921257019043}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31087.25, 81920.0], "load": 0.93, "temperature": 69, "power": 331.218}}}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 15/16 (100%)] Loss: 7.093 (7.04) Time: 0.370s, 691.49/s (0.449s, 569.72/s) LR: 9.993e-02 Data: 0.000 (0.080)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.377 (1.377) Loss: 6.8929 (6.8929) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.3906 ( 0.3906)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.020 (0.248) Loss: 6.3971 (6.8630) Acc@1: 0.0000 ( 0.2180) Acc@5: 3.1250 ( 1.0174)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 692.367031574203, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31571.25, 81920.0], "load": 0.74, "temperature": 64, "power": 308.379}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [31571.25, 81920.0], "load": 0, "temperature": 62, "power": 94.088}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91213321685791}, "pipe": "data"} +{"event": "line", "data": "Train: 6 [ 0/16 ( 0%)] Loss: 6.912 (6.91) Time: 1.484s, 172.47/s (1.484s, 172.47/s) LR: 9.990e-02 Data: 1.116 (1.116)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 523.5067698924132, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.013132095336914}, "pipe": "data"} +{"event": "data", "data": {"rate": 611.2803999204593, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.993353366851807}, "pipe": "data"} +{"event": "data", "data": {"rate": 611.9835247915291, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.026797771453857}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [32055.25, 81920.0], "load": 0.91, "temperature": 67, "power": 316.983}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 614.2977911677526, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.100984573364258}, "pipe": "data"} +{"event": "data", "data": {"rate": 615.9259051850607, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.15781307220459}, "pipe": "data"} +{"event": "line", "data": "Train: 6 [ 15/16 (100%)] Loss: 7.158 (7.05) Time: 0.368s, 695.65/s (0.450s, 568.76/s) LR: 9.990e-02 Data: 0.000 (0.082)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.267 (1.267) Loss: 6.7929 (6.7929) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.020 (0.249) Loss: 6.4795 (6.8732) Acc@1: 0.0000 ( 0.2422) Acc@5: 0.0000 ( 1.0659)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 694.891429561349, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [32055.25, 81920.0], "load": 0, "temperature": 62, "power": 98.227}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [32539.25, 81920.0], "load": 0.99, "temperature": 62, "power": 92.824}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8790178298950195}, "pipe": "data"} +{"event": "line", "data": "Train: 7 [ 0/16 ( 0%)] Loss: 6.879 (6.88) Time: 1.528s, 167.50/s (1.528s, 167.50/s) LR: 9.987e-02 Data: 1.161 (1.161)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 695.3135700603106, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.949779033660889}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33023.25, 81920.0], "load": 0.99, "temperature": 65, "power": 302.756}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 586.7104683923039, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0393571853637695}, "pipe": "data"} +{"event": "data", "data": {"rate": 612.4711148294772, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.072181701660156}, "pipe": "data"} +{"event": "data", "data": {"rate": 618.7530449870613, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.134942531585693}, "pipe": "data"} +{"event": "data", "data": {"rate": 618.6033638331561, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.105975151062012}, "pipe": "data"} +{"event": "line", "data": "Train: 7 [ 15/16 (100%)] Loss: 7.106 (7.03) Time: 0.369s, 694.50/s (0.452s, 566.08/s) LR: 9.987e-02 Data: 0.000 (0.084)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33023.25, 81920.0], "load": 0.99, "temperature": 65, "power": 199.486}}}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.449 (1.449) Loss: 6.8728 (6.8728) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.020 (0.248) Loss: 6.4905 (6.8484) Acc@1: 0.0000 ( 0.2907) Acc@5: 0.0000 ( 0.9448)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/resnet152.D1/20240409-021022-resnet152-224/checkpoint-7.pth.tar', 0.29069767441860467)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 695.1518629167043, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33507.25, 81920.0], "load": 0, "temperature": 61, "power": 329.14}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33507.25, 81920.0], "load": 0, "temperature": 60, "power": 91.789}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.892126083374023}, "pipe": "data"} +{"event": "line", "data": "Train: 8 [ 0/16 ( 0%)] Loss: 6.892 (6.89) Time: 1.478s, 173.16/s (1.478s, 173.16/s) LR: 9.982e-02 Data: 1.112 (1.112)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 612.2378218878015, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962090969085693}, "pipe": "data"} +{"event": "data", "data": {"rate": 695.1659601060494, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.991216659545898}, "pipe": "data"} +{"event": "data", "data": {"rate": 585.7199573105788, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.029236793518066}, "pipe": "data"} +{"event": "data", "data": {"rate": 611.1659916862183, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33991.25, 81920.0], "load": 0.99, "temperature": 66, "power": 300.125}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.994783401489258}, "pipe": "data"} +{"event": "data", "data": {"rate": 616.6243599060779, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.076773643493652}, "pipe": "data"} +{"event": "line", "data": "Train: 8 [ 15/16 (100%)] Loss: 7.077 (6.99) Time: 0.368s, 694.93/s (0.450s, 569.38/s) LR: 9.982e-02 Data: 0.000 (0.082)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 624.539359529227, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.376 (1.376) Loss: 6.8128 (6.8128) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.020 (0.248) Loss: 6.6031 (6.8347) Acc@1: 0.0000 ( 0.1938) Acc@5: 0.0000 ( 0.9448)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 694.8594525699632, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [34029.25, 81920.0], "load": 0, "temperature": 62, "power": 93.317}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [34475.25, 81920.0], "load": 0.93, "temperature": 64, "power": 302.132}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.909823894500732}, "pipe": "data"} +{"event": "line", "data": "Train: 9 [ 0/16 ( 0%)] Loss: 6.910 (6.91) Time: 1.490s, 171.77/s (1.490s, 171.77/s) LR: 9.978e-02 Data: 1.124 (1.124)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 584.5996311308776, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.875603675842285}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [34959.25, 81920.0], "load": 0.91, "temperature": 65, "power": 307.862}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 615.7732117011047, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.959753036499023}, "pipe": "data"} +{"event": "data", "data": {"rate": 614.0100112957215, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.955413818359375}, "pipe": "data"} +{"event": "data", "data": {"rate": 617.7516108705771, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0825042724609375}, "pipe": "data"} +{"event": "data", "data": {"rate": 620.1354897695534, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.019111156463623}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [34959.25, 81920.0], "load": 1.0, "temperature": 66, "power": 309.524}}}, "pipe": "data"} +{"event": "line", "data": "Train: 9 [ 15/16 (100%)] Loss: 7.019 (6.96) Time: 0.368s, 696.13/s (0.449s, 569.88/s) LR: 9.978e-02 Data: 0.000 (0.081)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.427 (1.427) Loss: 6.8140 (6.8140) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.020 (0.248) Loss: 6.4316 (6.8139) Acc@1: 0.0000 ( 0.3149) Acc@5: 0.0000 ( 1.1870)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/resnet152.D1/20240409-021022-resnet152-224/checkpoint-9.pth.tar', 0.31492248062015504)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 696.1081114848791, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [35443.25, 81920.0], "load": 0.63, "temperature": 62, "power": 94.135}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [35443.25, 81920.0], "load": 0, "temperature": 60, "power": 92.269}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.845844268798828}, "pipe": "data"} +{"event": "line", "data": "Train: 10 [ 0/16 ( 0%)] Loss: 6.846 (6.85) Time: 1.468s, 174.40/s (1.468s, 174.40/s) LR: 9.973e-02 Data: 1.101 (1.101)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.902804374694824}, "pipe": "data"} +{"event": "data", "data": {"rate": 612.9584599855425, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9039082527160645}, "pipe": "data"} +{"event": "data", "data": {"rate": 614.2928108188546, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9279279708862305}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [35927.25, 81920.0], "load": 0.93, "temperature": 65, "power": 257.355}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 612.8297781266544, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.979306221008301}, "pipe": "data"} +{"event": "data", "data": {"rate": 617.3979436476217, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "stop", "data": null, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "resnet152", "--batch-size", "256", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/zobisevu.2024-04-09_01:45:18.095020/resnet152.D1", "--checkpoint-hist", "1"], "time": 1712628756.960661, "return_code": -15}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/resnet50.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/resnet50.D0.data new file mode 100644 index 000000000..e6cff7d38 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/resnet50.D0.data @@ -0,0 +1,2108 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "resnet50", "--batch-size": 64}, "tags": ["classification", "convnet", "resnet", "vision"], "weight": 1.0, "name": "resnet50", "tag": ["resnet50", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 68, "power": 95.239, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 70, "power": 109.532, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712627631.101081, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-resnet50.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "resnet50", "--batch-size", "64"], "time": 1712627634.2040732}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0189208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.141357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1485595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07318115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0589599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13543701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06268310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1090087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1446533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08746337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1492919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0975341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11920166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4743.25, 81920.0], "load": 0.8, "temperature": 66, "power": 178.257}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0943603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.185791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11456298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02716064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13958740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04656982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.994384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99810791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9683837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03021240234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0460205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11968994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06439208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01129150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03814697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.12310791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9730224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06427001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89617919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.12481689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9935302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92803955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9180908203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92852783203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0045166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0550537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.053955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08026123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0406494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06060791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01177978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9913330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.995849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0933837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.084716796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04132080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97540283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11065673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03277587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.925048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0076904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.011474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0970458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4743.25, 81920.0], "load": 0.74, "temperature": 66, "power": 302.428}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90789794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86712646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.74407958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85906982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98822021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88079833984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90875244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9716796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9688720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84259033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95147705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.812744140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9739990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99066162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.760498046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9683837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0108642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94647216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.956298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00640869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90313720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1073.1107376771224, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94976806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.985595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06951904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92645263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99237060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93511962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9364013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92181396484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00555419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96148681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99151611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0391845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9122314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94342041015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1079.7736566650876, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.933837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.001708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89166259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9281005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9161376953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05157470703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95526123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0093994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.929931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99737548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.005615234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9273681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01171875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4743.25, 81920.0], "load": 0.79, "temperature": 67, "power": 271.843}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.965576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1081.49133639457, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14642333984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92083740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9735107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01495361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0101318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.966552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [2, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.72662353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 771.3035992412446, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79376220703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80889892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8753662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86322021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7818603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94903564453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01275634765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.81646728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8917236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.992919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88751220703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83465576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99383544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.907958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92449951171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938232421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1074.0683835884072, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8594970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.906982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8302001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97406005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9434814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95294189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02996826171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98907470703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92755126953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8988037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9034423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83819580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86309814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97064208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1075.847403097735, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.954345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9739990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9886474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9158935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94757080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.961669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9371337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.976806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4743.25, 81920.0], "load": 0.79, "temperature": 67, "power": 285.618}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96405029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90570068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00738525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96612548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94903564453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0523681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9747314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95806884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1079.2928877337604, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.049560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98590087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9737548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.819580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95794677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0809326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97479248046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.883544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98468017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9923095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [3, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.825927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1101.9086087272924, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85980224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.786376953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.850341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79498291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.75146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84417724609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7880859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83636474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87469482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95977783203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96307373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 815.4942951601603, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87982177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78558349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83172607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8668212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84478759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96240234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8812255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94757080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94085693359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03179931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91180419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88433837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0032958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.870849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4743.25, 81920.0], "load": 0.74, "temperature": 68, "power": 300.835}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9293212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1066.9576793737158, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97796630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93719482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00421142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91351318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98065185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9639892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9403076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.928466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0362548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99237060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9554443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.949951171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0242919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9603271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92755126953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1066.5435446707438, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.036865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8326416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.987060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9608154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9676513671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92315673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85491943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8707275390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00640869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98382568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9918212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05633544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92144775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [4, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1091.0905718726292, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.75518798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7857666015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8743896484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.75701904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.76666259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.879638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83795166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8651123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88006591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90704345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87725830078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 777.0841922263794, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8424072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98822021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84716796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93060302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84649658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78570556640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8453369140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4743.25, 81920.0], "load": 0.74, "temperature": 67, "power": 248.404}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88116455078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.052490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7698974609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0291748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.890380859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1083.804165970881, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9866943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94219970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.936279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94866943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9876708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91876220703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0609130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0474853515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9068603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.825439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87408447265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91717529296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1087.8293152291233, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93695068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9571533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99969482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05889892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90765380859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9873046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03533935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.975341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.989990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86065673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.022705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99322509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.998779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91595458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04693603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00433349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1090.92520466193, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.016357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10699462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [5, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.776611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93902587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7945556640625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4743.25, 81920.0], "load": 0.15, "temperature": 65, "power": 91.603}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 549.7781923319151, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83807373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.908935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78363037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9781494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.858154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8026123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88397216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.968505859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.900634765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9215087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9244384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.74603271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87750244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.965087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8619384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01373291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 909.4486883934359, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8697509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8800048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07879638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.872802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8953857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9554443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9736328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99176025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9207763671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9617919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9837646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93804931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8397216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.818359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08831787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1070.6268227192945, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88812255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88226318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.978759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01202392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86712646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9315185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9251708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.035888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9388427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9542236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91827392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00018310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.949951171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90899658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9385986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1085.5809197036363, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97064208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85308837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0299072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99151611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9407958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95916748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8702392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14739990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02166748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4743.25, 81920.0], "load": 0.74, "temperature": 68, "power": 182.674}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [6, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1096.7435986032217, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.766845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7198486328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8890380859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8394775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88006591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8902587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94049072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89520263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8848876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85162353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91583251953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88653564453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8624267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 834.973614614784, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93023681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9425048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8919677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93402099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9014892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98834228515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.906982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9183349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8953857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.992431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87017822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07659912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91363525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95977783203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95745849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99517822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8734130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1075.7254987401554, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96173095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8917236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9034423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79461669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9713134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88519287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05010986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.087646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8721923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97113037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0235595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.935302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88616943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93988037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92608642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1084.1441063742209, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86358642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.975341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07244873046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8929443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4743.25, 81920.0], "load": 0.73, "temperature": 67, "power": 311.343}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8892822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.883056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0374755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01177978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87158203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01385498046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95745849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [7, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1097.8821857681944, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88031005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7603759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7989501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7025146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.896728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91278076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 382.0517243281953, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9241943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8050537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7813720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98284912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.76763916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8951416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8736572265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.825927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.848388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90838623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85198974609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9759521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04522705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.894287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8397216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1093.3174752724976, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9031982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96685791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92254638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97930908203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.888427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0098876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87628173828125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4743.25, 81920.0], "load": 0.81, "temperature": 68, "power": 174.712}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8970947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00665283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7503662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04925537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8714599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9698486328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1081.055653675139, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96661376953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95611572265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00921630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99481201171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95672607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9947509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01983642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97576904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95281982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9803466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99456787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.953857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.923583984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9620361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.881103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10626220703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03509521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1085.7392888531, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.046142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02752685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02606201171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.869873046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01446533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [8, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7694091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1100.544867726802, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90570068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9017333984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8131103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.839599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8763427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7899169921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78131103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95062255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.838623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0047607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8917236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9434814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9041748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7479248046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9053955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 869.1778052814325, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84368896484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92041015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97747802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87054443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4743.25, 81920.0], "load": 0.81, "temperature": 67, "power": 157.991}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93328857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8746337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8553466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95281982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87518310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95941162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90655517578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9464111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9544677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91143798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93121337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0428466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1071.3912869133135, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01019287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97503662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96258544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0184326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85687255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89337158203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87615966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99981689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91278076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92169189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9564208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0125732421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93475341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79156494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97900390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.011474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1075.6078403254153, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89605712890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96893310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95782470703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9952392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92376708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86676025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.09295654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8553466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.018798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04425048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.976806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9403076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [9, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1108.0827777044026, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92926025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.6580810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4743.25, 81920.0], "load": 0, "temperature": 65, "power": 125.353}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8775634765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86627197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77252197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.763671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 397.14247919264284, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80657958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.939697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94830322265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96368408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7374267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80682373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.979248046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90826416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92901611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86505126953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94549560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9329833984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87249755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9117431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.937255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93511962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03314208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1081.2912237558346, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.873779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9061279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0120849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88226318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00732421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.932373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.956298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.996337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8609619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04974365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92803955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8800048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97467041015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88360595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1067.581422762112, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91302490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97784423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85040283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94573974609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87774658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97418212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96795654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04998779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.832275390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08526611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98223876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.904541015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01593017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1086.5082969044063, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97027587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95758056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.952880859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82525634765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9888916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4743.25, 81920.0], "load": 0.79, "temperature": 68, "power": 220.275}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [10, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7283935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 726.9268546492318, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.839111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8455810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.73211669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.75860595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91790771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87811279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87445068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86419677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8148193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80560302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84576416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82525634765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90692138671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88763427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98175048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1062.4432101278258, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92071533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00933837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79974365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85479736328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98846435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96307373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02642822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9974365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83905029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91107177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9456787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00018310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91265869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84893798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86065673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1075.3885974518796, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92889404296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91302490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99627685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85137939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99835205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88165283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88055419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85882568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8804931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0657958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89898681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0123291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9727783203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.925048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1060.2571750448576, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4743.25, 81920.0], "load": 0.79, "temperature": 68, "power": 241.354}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.923583984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86016845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85345458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92242431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93914794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.918212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87127685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [11, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.75531005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1105.4732153059406, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7322998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83526611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84112548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95172119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.825439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 460.4447976423759, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7950439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9066162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93914794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9088134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931396484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83197021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00299072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85174560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9654541015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94476318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9080810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8575439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78045654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8883056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88189697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1064.1620444458977, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91107177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.902099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89544677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.930908203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87200927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82464599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4743.25, 81920.0], "load": 0.79, "temperature": 67, "power": 262.959}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02545166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00091552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94940185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8238525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99969482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96783447265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98394775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90972900390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9095458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93463134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1057.5699795246192, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88482666015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9969482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05096435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86676025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95098876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9444580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8795166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99859619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9552001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02886962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97113037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1069.0498586410993, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90521240234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91668701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02362060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99249267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [12, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82745361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82025146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79949951171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.865966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 731.0319124685348, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83087158203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.925537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82574462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.767822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86492919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94927978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8480224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.833984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87884521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.030029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86968994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88458251953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9100341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87530517578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88140869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94757080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1077.510484994065, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.922119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4743.25, 81920.0], "load": 0.75, "temperature": 67, "power": 194.507}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.949462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.877685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94097900390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9739990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99249267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9276123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91339111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8748779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8927001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90081787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8587646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84796142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06488037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1053.3145070662272, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99530029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.909423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91583251953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.897705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95391845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.896728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9476318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9757080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87677001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.945068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0169677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94317626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00152587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94317626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9803466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97845458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1068.486785719481, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9322509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96636962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89862060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.971435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9573974609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99713134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0126953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89569091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06988525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [13, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89239501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1104.9428978286303, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4743.25, 81920.0], "load": 0, "temperature": 64, "power": 91.591}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8389892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.819091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93353271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94427490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79571533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8326416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82928466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 586.7240609921863, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97906494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87469482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89910888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7947998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7410888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01068115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86041259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.851318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98089599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82415771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0321044921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8673095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962158203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8050537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1081.1612238756582, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.894775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80596923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.934326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9671630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88690185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9774169921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01519775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88018798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88262939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93914794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.898193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9637451171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95635986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90606689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1054.8548666651757, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.985107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92730712890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9393310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87530517578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99542236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9061279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8958740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95416259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89166259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86102294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89556884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0406494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0345458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93426513671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.036865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.993408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1080.4424991977614, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99908447265625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4743.25, 81920.0], "load": 0.79, "temperature": 66, "power": 291.513}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.879638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [14, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80560302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8131103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79180908203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.884033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83612060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84588623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 753.63267927773, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00128173828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97076416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94549560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77191162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8509521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7977294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04913330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.922119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8463134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11358642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8697509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8599853515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1073.2868726649574, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9713134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.028076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89837646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9468994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01458740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02850341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91253662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83660888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79998779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88018798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8387451171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02410888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9302978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98956298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9554443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1060.810602138962, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97637939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93865966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87896728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86175537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9219970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95196533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97454833984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.097412109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03302001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93475341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4743.25, 81920.0], "load": 0.72, "temperature": 66, "power": 247.9}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9525146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87872314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8885498046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79437255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93511962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1078.9016835996001, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91729736328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9586181640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87786865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.827880859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02691650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05718994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [15, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77789306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 750.0099591166186, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4743.25, 81920.0], "load": 0, "temperature": 63, "power": 90.468}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-resnet50.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "resnet50", "--batch-size", "64"], "time": 1712627745.850268, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/resnet50.D1.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/resnet50.D1.data new file mode 100644 index 000000000..0c56eaf42 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/resnet50.D1.data @@ -0,0 +1,2108 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "torchvision", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "plan": {"method": "per_gpu"}, "argv": {"--precision": "tf32-fp16", "--lr": 0.01, "--no-stdout": true, "--epochs": 50, "--model": "resnet50", "--batch-size": 64}, "tags": ["classification", "convnet", "resnet", "vision"], "weight": 1.0, "name": "resnet50", "tag": ["resnet50", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 66, "power": 92.959, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 68, "power": 102.748, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712627634.184692, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-resnet50.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "resnet50", "--batch-size", "64"], "time": 1712627634.2123935}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0189208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.141357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1485595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07318115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0589599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13543701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06268310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1090087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1446533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08746337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1492919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0975341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11920166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0943603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [4743.25, 81920.0], "load": 0.81, "temperature": 69, "power": 184.846}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.185791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11456298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02716064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13958740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04656982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.994384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99810791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9683837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03021240234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0460205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11968994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06439208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01129150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03814697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.12310791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9730224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06427001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89617919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.12481689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9935302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92803955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9180908203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92852783203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0045166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0550537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.053955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08026123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0406494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06060791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01177978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9913330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.995849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0933837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.084716796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04132080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97540283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11065673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03277587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.925048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0076904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.011474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0970458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [4743.25, 81920.0], "load": 0.79, "temperature": 67, "power": 305.808}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90789794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86712646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.74407958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85906982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98822021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88079833984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90875244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9716796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9688720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84259033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95147705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.812744140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9739990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99066162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.760498046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9683837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0108642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94647216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.956298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00640869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90313720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94976806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1075.5107699251337, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.985595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06951904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92645263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99237060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93511962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9364013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92181396484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00555419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96148681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99151611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0391845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9122314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94342041015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1078.4680436427284, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.933837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.001708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89166259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9281005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9161376953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05157470703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95526123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0093994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.929931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99737548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.005615234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9273681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.965576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [4743.25, 81920.0], "load": 0.75, "temperature": 70, "power": 321.086}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1074.9086350630441, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14642333984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92083740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9735107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01495361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0101318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.966552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [2, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.72662353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79376220703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 728.0190157050196, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80889892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8753662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86322021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7818603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94903564453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01275634765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.81646728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8917236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.992919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88751220703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83465576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99383544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.907958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92449951171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938232421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8594970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1069.015943440355, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.906982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8302001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97406005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9434814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95294189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02996826171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98907470703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92755126953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8988037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9034423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83819580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86309814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97064208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.954345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1072.1638451619012, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9739990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9886474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9158935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94757080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.961669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9371337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.976806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [4743.25, 81920.0], "load": 0.74, "temperature": 70, "power": 275.053}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96405029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90570068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00738525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96612548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94903564453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0523681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9747314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95806884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1067.612150659867, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.049560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98590087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9737548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.819580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95794677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0809326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97479248046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.883544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98468017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9923095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [3, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.825927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1104.6764506958518, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85980224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.786376953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.850341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79498291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.75146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84417724609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7880859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83636474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87469482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95977783203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96307373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87982177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 832.0443092716239, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78558349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83172607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8668212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84478759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96240234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8812255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94757080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94085693359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03179931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91180419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88433837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0032958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.870849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9293212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [4743.25, 81920.0], "load": 0.8, "temperature": 69, "power": 285.837}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97796630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1055.0334581558157, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93719482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00421142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91351318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98065185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9639892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9403076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.928466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0362548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99237060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9554443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.949951171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0242919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9603271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92755126953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.036865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1051.9586317693484, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8326416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.987060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9608154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9676513671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92315673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85491943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8707275390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00640869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98382568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9918212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05633544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92144775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [4, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1094.1128931860696, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.75518798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7857666015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8743896484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.75701904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.76666259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.879638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83795166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8651123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88006591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90704345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87725830078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 760.7279207511516, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8424072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98822021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84716796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93060302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84649658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78570556640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8453369140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [4743.25, 81920.0], "load": 0.74, "temperature": 69, "power": 232.848}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88116455078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.052490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7698974609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0291748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.890380859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1051.2032586808268, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9866943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94219970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.936279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94866943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9876708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91876220703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0609130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0474853515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9068603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.825439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87408447265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91717529296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1062.8481305078724, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93695068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9571533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99969482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05889892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90765380859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9873046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03533935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.975341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.989990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86065673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.022705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99322509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.998779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91595458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04693603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00433349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1066.9085044909616, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.016357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10699462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [5, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.776611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93902587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7945556640625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [4743.25, 81920.0], "load": 0, "temperature": 67, "power": 100.456}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83807373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 533.3025380745513, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.908935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78363037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9781494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.858154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8026123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88397216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.968505859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.900634765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9215087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9244384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.74603271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87750244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.965087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8619384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01373291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8697509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1073.4425263539315, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8800048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07879638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.872802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8953857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9554443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9736328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99176025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9207763671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9617919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9837646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93804931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8397216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.818359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08831787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88812255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88226318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1051.8451364422526, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.978759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01202392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86712646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9315185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9251708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.035888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9388427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9542236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91827392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00018310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.949951171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90899658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9385986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97064208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85308837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1068.9963167152214, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0299072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99151611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9407958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95916748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8702392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14739990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02166748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [4743.25, 81920.0], "load": 0.81, "temperature": 70, "power": 190.397}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [6, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1094.7395716079134, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.766845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7198486328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8890380859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8394775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88006591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8902587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94049072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89520263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8848876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85162353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91583251953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88653564453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8624267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93023681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9425048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 866.2964021092885, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8919677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93402099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9014892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98834228515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.906982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9183349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8953857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.992431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87017822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07659912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91363525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95977783203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95745849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99517822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8734130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96173095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1070.5391278538523, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8917236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9034423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79461669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9713134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88519287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05010986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.087646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8721923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97113037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0235595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.935302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88616943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93988037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92608642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86358642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1077.2806585172593, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.975341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07244873046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8929443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [4743.25, 81920.0], "load": 0.74, "temperature": 71, "power": 212.482}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8892822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.883056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0374755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01177978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87158203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01385498046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95745849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [7, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1099.72017517044, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88031005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7603759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7989501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7025146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.896728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91278076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9241943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 434.7070990094119, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8050537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7813720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98284912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.76763916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8951416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8736572265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.825927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.848388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90838623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85198974609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9759521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04522705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.894287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8397216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9031982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1068.2516872021638, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96685791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92254638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97930908203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.888427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0098876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87628173828125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [4743.25, 81920.0], "load": 0.73, "temperature": 70, "power": 233.364}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8970947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00665283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7503662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04925537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8714599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9698486328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96661376953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95611572265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1053.6259608741925, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00921630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99481201171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95672607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9947509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01983642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97576904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95281982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9803466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99456787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.953857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.923583984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9620361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.881103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10626220703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03509521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.046142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02752685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1074.5214288460552, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02606201171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.869873046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01446533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [8, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7694091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90570068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9017333984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8131103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 800.2667319027433, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.839599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8763427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7899169921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78131103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95062255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.838623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0047607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8917236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9434814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9041748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7479248046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9053955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84368896484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92041015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1067.264333581739, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97747802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87054443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93328857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [4743.25, 81920.0], "load": 0.8, "temperature": 70, "power": 242.936}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8746337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8553466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95281982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87518310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95941162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90655517578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9464111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9544677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91143798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93121337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0428466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01019287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97503662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1056.8758157365573, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96258544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0184326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85687255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89337158203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87615966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99981689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91278076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92169189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9564208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0125732421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93475341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79156494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97900390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.011474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89605712890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96893310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1056.7694817250854, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95782470703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9952392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92376708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86676025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.09295654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8553466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.018798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04425048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.976806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9403076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [9, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92926025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1103.6395687744096, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.6580810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [4743.25, 81920.0], "load": 0.04, "temperature": 68, "power": 101.58}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8775634765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86627197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77252197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.763671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80657958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.939697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94830322265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 510.07878575971745, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96368408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7374267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80682373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.979248046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90826416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92901611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86505126953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94549560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9329833984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87249755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9117431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.937255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93511962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03314208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.873779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9061279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1074.6156332729634, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0120849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88226318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00732421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.932373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.956298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.996337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8609619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04974365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92803955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8800048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97467041015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88360595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91302490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97784423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1076.2562012178407, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85040283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94573974609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87774658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97418212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96795654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04998779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.832275390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08526611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98223876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.904541015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01593017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97027587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95758056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1087.0027640981666, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.952880859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82525634765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9888916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [10, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [4743.25, 81920.0], "load": 0.76, "temperature": 71, "power": 305.408}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7283935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.839111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8455810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.73211669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.75860595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 686.9367763927554, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91790771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87811279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87445068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86419677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8148193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80560302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84576416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82525634765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90692138671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88763427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98175048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92071533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00933837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79974365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85479736328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1072.4710101524402, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98846435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96307373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02642822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9974365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83905029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91107177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9456787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00018310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91265869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84893798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86065673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92889404296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91302490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99627685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1077.8310721970681, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85137939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99835205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88165283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88055419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85882568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8804931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0657958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89898681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0123291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9727783203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.925048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.923583984375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [4743.25, 81920.0], "load": 0.76, "temperature": 71, "power": 313.609}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1072.5208461061357, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86016845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85345458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92242431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93914794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.918212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87127685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [11, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.75531005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7322998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 781.4491693768596, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83526611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84112548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95172119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.825439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7950439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9066162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 588.9590461130945, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93914794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9088134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931396484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83197021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00299072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85174560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9654541015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94476318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9080810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8575439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78045654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8883056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88189697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91107177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.902099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1065.2640846208974, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89544677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.930908203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87200927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82464599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [4743.25, 81920.0], "load": 0.79, "temperature": 71, "power": 257.305}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02545166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00091552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94940185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8238525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99969482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96783447265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98394775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90972900390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9095458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93463134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88482666015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9969482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1054.7775532695819, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05096435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86676025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95098876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9444580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8795166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99859619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9552001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02886962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97113037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90521240234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91668701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1071.3171409419163, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02362060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99249267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [12, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82745361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82025146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79949951171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.865966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83087158203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.925537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82574462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 775.0377582652874, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.767822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86492919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94927978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8480224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.833984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87884521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.030029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86968994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88458251953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9100341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87530517578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88140869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94757080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.922119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.949462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [4743.25, 81920.0], "load": 0.74, "temperature": 70, "power": 318.535}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.877685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1052.8264414759676, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94097900390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9739990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99249267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9276123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91339111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8748779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8927001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90081787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8587646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84796142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06488037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99530029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.909423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1063.5481537096332, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91583251953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.897705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95391845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.896728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9476318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9757080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87677001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.945068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0169677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94317626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00152587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94317626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9803466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97845458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9322509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96636962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89862060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1064.3641485739422, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.971435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9573974609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99713134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0126953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89569091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06988525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [13, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89239501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8389892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.819091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 771.2303476699581, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [4743.25, 81920.0], "load": 0.23, "temperature": 66, "power": 324.621}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93353271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94427490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79571533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8326416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82928466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97906494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87469482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89910888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 770.730235557168, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7947998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7410888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01068115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86041259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.851318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98089599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82415771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0321044921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8673095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962158203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8050537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.894775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80596923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.934326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1082.0673084494067, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9671630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88690185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9774169921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01519775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88018798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88262939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93914794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.898193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9637451171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95635986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90606689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.985107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92730712890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9393310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1065.9223552926815, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87530517578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99542236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9061279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8958740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95416259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89166259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86102294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89556884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0406494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0345458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93426513671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.036865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.993408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99908447265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.879638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [4743.25, 81920.0], "load": 0.8, "temperature": 70, "power": 311.339}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [14, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1089.072818406592, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80560302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8131103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79180908203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.884033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83612060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84588623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00128173828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97076416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94549560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 756.8132350949863, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77191162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8509521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7977294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04913330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.922119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8463134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11358642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8697509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8599853515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9713134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.028076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89837646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9468994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1073.0929140922124, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01458740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02850341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91253662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83660888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79998779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88018798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8387451171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02410888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9302978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98956298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9554443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97637939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93865966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87896728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1060.1145952819456, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86175537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9219970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95196533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97454833984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.097412109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03302001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93475341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9525146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [4743.25, 81920.0], "load": 0.77, "temperature": 70, "power": 257.145}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87872314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8885498046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79437255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93511962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91729736328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9586181640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87786865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1080.9938862065835, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.827880859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02691650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05718994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [15, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77789306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 650.4284119569786, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [4743.25, 81920.0], "load": 0.06, "temperature": 66, "power": 283.028}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-resnet50.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--precision", "tf32-fp16", "--lr", "0.01", "--no-stdout", "--epochs", "50", "--model", "resnet50", "--batch-size", "64"], "time": 1712627745.7782917, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/rwkv.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/rwkv.D0.data new file mode 100644 index 000000000..f72ef053f --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/rwkv.D0.data @@ -0,0 +1,423 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "rwkv", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv", "tags": ["llm", "rnn", "unsupported-rocm"], "plan": {"method": "per_gpu"}, "argv": {"--data_type": "dummy", "--ctx_len": 128, "--epoch_steps": 1000, "--epoch_count": 20, "--epoch_begin": 0, "--epoch_save": 0, "--micro_bsz": 16, "--n_layer": 12, "--n_embd": 768, "--pre_ffn": 0, "--head_qk": 0, "--lr_init": "6e-4", "--lr_final": "1e-5", "--warmup_steps": 0, "--beta1": 0.9, "--beta2": 0.99, "--adam_eps": "1e-8", "--accelerator": "gpu", "--devices": 1, "--precision": "tf32", "--strategy": "ddp_find_unused_parameters_false", "--grad_cp": 0, "--random_seed": 1234, "--enable_progress_bar": "False"}, "weight": 1.0, "name": "rwkv", "tag": ["rwkv", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.01, "memory": 0.010771942138671876}, "temperature": 38, "power": 75.781, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.1, "memory": 0.010771942138671876}, "temperature": 39, "power": 75.43, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712629726.834737, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv/voirconf-rwkv.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py", "--data_type", "dummy", "--ctx_len", "128", "--epoch_steps", "1000", "--epoch_count", "20", "--epoch_begin", "0", "--epoch_save", "0", "--micro_bsz", "16", "--n_layer", "12", "--n_embd", "768", "--pre_ffn", "0", "--head_qk", "0", "--lr_init", "6e-4", "--lr_final", "1e-5", "--warmup_steps", "0", "--beta1", "0.9", "--beta2", "0.99", "--adam_eps", "1e-8", "--accelerator", "gpu", "--devices", "1", "--precision", "tf32", "--strategy", "ddp_find_unused_parameters_false", "--grad_cp", "0", "--random_seed", "1234", "--enable_progress_bar", "False"], "time": 1712629729.9207287}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "[2024-04-09 02:28:52,184] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", "pipe": "stdout"} +{"event": "line", "data": "########## work in progress ##########\n", "pipe": "stderr"} +{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"} +{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"} +{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "line", "data": "Global seed set to 1234\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "############################################################################\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# RWKV-4 TF32 on 1x1 GPU, bsz 1x1x16=16, ddp_find_unused_parameters_false \n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Data = (dummy), ProjDir = /Users/satyaortiz-gagne/travail/mila/milabench/proj/rwkv/\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Epoch = 0 to 19 (will continue afterwards), save every 0 epoch\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Each \"epoch\" = 1000 steps, 16000 samples, 2048000 tokens\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Model = 12 n_layer, 768 n_embd, 128 ctx_len\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Adam = lr 0.0006 to 1e-05, warmup 0 steps, beta (0.9, 0.99), eps 1e-08\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Found torch 2.1.0+cu118, recommend 1.13.1+cu117 or newer\n", "pipe": "stderr"} +{"event": "line", "data": "# Found deepspeed 0.12.2, recommend 0.7.0 (faster than newer versions)\n", "pipe": "stderr"} +{"event": "line", "data": "# Found pytorch_lightning 1.9.5, recommend 1.9.1 or newer\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "############################################################################\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "{'load_model': '', 'wandb': '', 'proj_dir': '/Users/satyaortiz-gagne/travail/mila/milabench/proj/rwkv/', 'random_seed': 1234, 'data_file': '', 'data_type': 'dummy', 'vocab_size': 0, 'ctx_len': 128, 'epoch_steps': 1000, 'epoch_count': 20, 'epoch_begin': 0, 'epoch_save': 0, 'micro_bsz': 16, 'n_layer': 12, 'n_embd': 768, 'dim_att': 768, 'dim_ffn': 3072, 'pre_ffn': 0, 'head_qk': 0, 'tiny_att_dim': 0, 'tiny_att_layer': -999, 'lr_init': 0.0006, 'lr_final': 1e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.99, 'adam_eps': 1e-08, 'grad_cp': 0, 'my_pile_version': 1, 'my_pile_stage': 0, 'my_pile_shift': -1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'my_img_version': 0, 'my_img_size': 0, 'my_img_bit': 0, 'my_img_clip': 'x', 'my_img_clip_scale': 1, 'my_img_l1_scale': 0, 'my_img_encoder': 'x', 'my_sample_len': 0, 'my_ffn_shift': 1, 'my_att_shift': 1, 'my_pos_emb': 0, 'load_partial': 0, 'magic_prime': 0, 'my_qa_mask': 0, 'my_testing': '', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': False, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': None, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'ddp_find_unused_parameters_false', 'sync_batchnorm': False, 'precision': 'tf32', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2024-04-09-02-28-53', 'betas': (0.9, 0.99), 'real_bsz': 16, 'run_name': '0 ctx128 L12 D768'}\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "Building dummy data...\n", "pipe": "stderr"} +{"event": "line", "data": "Building token list...\n", "pipe": "stderr"} +{"event": "line", "data": "Data has 1620950 tokens, 13 vocab size.\n", "pipe": "stderr"} +{"event": "line", "data": "RWKV_MY_TESTING \n", "pipe": "stdout"} +{"event": "line", "data": "Using /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118 as PyTorch extensions root...\n", "pipe": "stderr"} +{"event": "line", "data": "Detected CUDA files, patching ldflags\n", "pipe": "stderr"} +{"event": "line", "data": "Emitting ninja build file /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118/wkv_128/build.ninja...\n", "pipe": "stderr"} +{"event": "line", "data": "Building extension module wkv_128...\n", "pipe": "stderr"} +{"event": "line", "data": "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", "pipe": "stderr"} +{"event": "line", "data": "ninja: no work to do.\n", "pipe": "stdout"} +{"event": "line", "data": "Loading extension module wkv_128...\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "line", "data": "############################################################################\n", "pipe": "stdout"} +{"event": "line", "data": "#\n", "pipe": "stdout"} +{"event": "line", "data": "# Init model weight (slow for large models)...\n", "pipe": "stdout"} +{"event": "line", "data": "#\n", "pipe": "stdout"} +{"event": "line", "data": "############################################################################\n", "pipe": "stdout"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "line", "data": "13 768 -0.0006 emb.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.0.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.0.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.0.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.0.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.0.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.0.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.0.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.1.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.1.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.1.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.1.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.1.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.1.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.1.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.2.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.2.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.2.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.2.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.2.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.2.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.2.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.3.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.3.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.3.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.3.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.3.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.3.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.3.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.4.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.4.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.4.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.4.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.4.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.4.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.4.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.5.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.5.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.5.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.5.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.5.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.5.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.5.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.6.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.6.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.6.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.6.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.6.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.6.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.6.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.7.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.7.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.7.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.7.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.7.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.7.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.7.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.8.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.8.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.8.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.8.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.8.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.8.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.8.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.9.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.9.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.9.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.9.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.9.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.9.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.9.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.10.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.10.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.10.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.10.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.10.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.10.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.10.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.11.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.11.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.11.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.11.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.11.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.11.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.11.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "13 768 0.5 head.weight\n", "pipe": "stdout"} +{"event": "line", "data": "GPU available: True (cuda), used: True\n", "pipe": "stderr"} +{"event": "line", "data": "TPU available: False, using: 0 TPU cores\n", "pipe": "stderr"} +{"event": "line", "data": "IPU available: False, using: 0 IPUs\n", "pipe": "stderr"} +{"event": "line", "data": "HPU available: False, using: 0 HPUs\n", "pipe": "stderr"} +{"event": "line", "data": "13 768 emb.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln0.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln0.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.0.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.0.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.1.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.1.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.2.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.2.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.3.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.3.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.4.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.4.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.5.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.5.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.6.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.6.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.7.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.7.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.8.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.8.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.9.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.9.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.10.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.10.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.11.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.11.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 ln_out.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 ln_out.bias\n", "pipe": "stdout"} +{"event": "line", "data": "13 768 head.weight\n", "pipe": "stdout"} +{"event": "line", "data": "[rank: 0] Global seed set to 1234\n", "pipe": "stderr"} +{"event": "line", "data": "Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/1\n", "pipe": "stderr"} +{"event": "line", "data": "----------------------------------------------------------------------------------------------------\n", "pipe": "stderr"} +{"event": "line", "data": "distributed_backend=nccl\n", "pipe": "stderr"} +{"event": "line", "data": "All distributed processes registered. Starting with 1 processes\n", "pipe": "stderr"} +{"event": "line", "data": "----------------------------------------------------------------------------------------------------\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1571.25, 81920.0], "load": 0.14, "temperature": 37, "power": 72.322}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 37, "power": 72.224}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1415.25, 81920.0], "load": 0, "temperature": 37, "power": 71.759}}}, "pipe": "data"} +{"event": "error", "data": {"type": "Exception", "message": ">- DeepSpeed Op Builder: Installed CUDA version 12.2 does not match the version torch was compiled with 11.8, unable to compile cuda/cpp extensions without a matching cuda version."}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py\", line 420, in \n", "pipe": "stderr"} +{"event": "line", "data": " trainer.fit(model, data_loader)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 608, in fit\n", "pipe": "stderr"} +{"event": "line", "data": " call._call_and_handle_interrupt(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py\", line 36, in _call_and_handle_interrupt\n", "pipe": "stderr"} +{"event": "line", "data": " return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/launchers/subprocess_script.py\", line 88, in launch\n", "pipe": "stderr"} +{"event": "line", "data": " return function(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 650, in _fit_impl\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(model, ckpt_path=self.ckpt_path)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 1093, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " self.strategy.setup(self)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/ddp.py\", line 181, in setup\n", "pipe": "stderr"} +{"event": "line", "data": " self.setup_optimizers(trainer)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/strategy.py\", line 142, in setup_optimizers\n", "pipe": "stderr"} +{"event": "line", "data": " self.optimizers, self.lr_scheduler_configs, self.optimizer_frequencies = _init_optimizers_and_lr_schedulers(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/core/optimizer.py\", line 180, in _init_optimizers_and_lr_schedulers\n", "pipe": "stderr"} +{"event": "line", "data": " optim_conf = model.trainer._call_lightning_module_hook(\"configure_optimizers\", pl_module=model)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 1356, in _call_lightning_module_hook\n", "pipe": "stderr"} +{"event": "line", "data": " output = fn(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/src/model.py\", line 606, in configure_optimizers\n", "pipe": "stderr"} +{"event": "line", "data": " return FusedAdam(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/adam/fused_adam.py\", line 94, in __init__\n", "pipe": "stderr"} +{"event": "line", "data": " fused_adam_cuda = FusedAdamBuilder().load()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py\", line 452, in load\n", "pipe": "stderr"} +{"event": "line", "data": " return self.jit_load(verbose)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py\", line 467, in jit_load\n", "pipe": "stderr"} +{"event": "line", "data": " assert_no_cuda_mismatch(self.name)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py\", line 96, in assert_no_cuda_mismatch\n", "pipe": "stderr"} +{"event": "line", "data": " raise Exception(f\">- DeepSpeed Op Builder: Installed CUDA version {sys_cuda_version} does not match the \"\n", "pipe": "stderr"} +{"event": "line", "data": "Exception: >- DeepSpeed Op Builder: Installed CUDA version 12.2 does not match the version torch was compiled with 11.8, unable to compile cuda/cpp extensions without a matching cuda version.\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv/voirconf-rwkv.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py", "--data_type", "dummy", "--ctx_len", "128", "--epoch_steps", "1000", "--epoch_count", "20", "--epoch_begin", "0", "--epoch_save", "0", "--micro_bsz", "16", "--n_layer", "12", "--n_embd", "768", "--pre_ffn", "0", "--head_qk", "0", "--lr_init", "6e-4", "--lr_final", "1e-5", "--warmup_steps", "0", "--beta1", "0.9", "--beta2", "0.99", "--adam_eps", "1e-8", "--accelerator", "gpu", "--devices", "1", "--precision", "tf32", "--strategy", "ddp_find_unused_parameters_false", "--grad_cp", "0", "--random_seed", "1234", "--enable_progress_bar", "False"], "time": 1712629740.9566448, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/rwkv.D1.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/rwkv.D1.data new file mode 100644 index 000000000..ba1af09af --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/rwkv.D1.data @@ -0,0 +1,423 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "rwkv", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv", "tags": ["llm", "rnn", "unsupported-rocm"], "plan": {"method": "per_gpu"}, "argv": {"--data_type": "dummy", "--ctx_len": 128, "--epoch_steps": 1000, "--epoch_count": 20, "--epoch_begin": 0, "--epoch_save": 0, "--micro_bsz": 16, "--n_layer": 12, "--n_embd": 768, "--pre_ffn": 0, "--head_qk": 0, "--lr_init": "6e-4", "--lr_final": "1e-5", "--warmup_steps": 0, "--beta1": 0.9, "--beta2": 0.99, "--adam_eps": "1e-8", "--accelerator": "gpu", "--devices": 1, "--precision": "tf32", "--strategy": "ddp_find_unused_parameters_false", "--grad_cp": 0, "--random_seed": 1234, "--enable_progress_bar": "False"}, "weight": 1.0, "name": "rwkv", "tag": ["rwkv", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 38, "power": 72.028, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 39, "power": 74.581, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712629729.902659, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv/voirconf-rwkv.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py", "--data_type", "dummy", "--ctx_len", "128", "--epoch_steps", "1000", "--epoch_count", "20", "--epoch_begin", "0", "--epoch_save", "0", "--micro_bsz", "16", "--n_layer", "12", "--n_embd", "768", "--pre_ffn", "0", "--head_qk", "0", "--lr_init", "6e-4", "--lr_final", "1e-5", "--warmup_steps", "0", "--beta1", "0.9", "--beta2", "0.99", "--adam_eps", "1e-8", "--accelerator", "gpu", "--devices", "1", "--precision", "tf32", "--strategy", "ddp_find_unused_parameters_false", "--grad_cp", "0", "--random_seed", "1234", "--enable_progress_bar", "False"], "time": 1712629729.960097}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "[2024-04-09 02:28:52,249] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", "pipe": "stdout"} +{"event": "line", "data": "########## work in progress ##########\n", "pipe": "stderr"} +{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"} +{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"} +{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "line", "data": "Global seed set to 1234\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "############################################################################\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# RWKV-4 TF32 on 1x1 GPU, bsz 1x1x16=16, ddp_find_unused_parameters_false \n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Data = (dummy), ProjDir = /Users/satyaortiz-gagne/travail/mila/milabench/proj/rwkv/\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Epoch = 0 to 19 (will continue afterwards), save every 0 epoch\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Each \"epoch\" = 1000 steps, 16000 samples, 2048000 tokens\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Model = 12 n_layer, 768 n_embd, 128 ctx_len\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Adam = lr 0.0006 to 1e-05, warmup 0 steps, beta (0.9, 0.99), eps 1e-08\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Found torch 2.1.0+cu118, recommend 1.13.1+cu117 or newer\n", "pipe": "stderr"} +{"event": "line", "data": "# Found deepspeed 0.12.2, recommend 0.7.0 (faster than newer versions)\n", "pipe": "stderr"} +{"event": "line", "data": "# Found pytorch_lightning 1.9.5, recommend 1.9.1 or newer\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "############################################################################\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "{'load_model': '', 'wandb': '', 'proj_dir': '/Users/satyaortiz-gagne/travail/mila/milabench/proj/rwkv/', 'random_seed': 1234, 'data_file': '', 'data_type': 'dummy', 'vocab_size': 0, 'ctx_len': 128, 'epoch_steps': 1000, 'epoch_count': 20, 'epoch_begin': 0, 'epoch_save': 0, 'micro_bsz': 16, 'n_layer': 12, 'n_embd': 768, 'dim_att': 768, 'dim_ffn': 3072, 'pre_ffn': 0, 'head_qk': 0, 'tiny_att_dim': 0, 'tiny_att_layer': -999, 'lr_init': 0.0006, 'lr_final': 1e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.99, 'adam_eps': 1e-08, 'grad_cp': 0, 'my_pile_version': 1, 'my_pile_stage': 0, 'my_pile_shift': -1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'my_img_version': 0, 'my_img_size': 0, 'my_img_bit': 0, 'my_img_clip': 'x', 'my_img_clip_scale': 1, 'my_img_l1_scale': 0, 'my_img_encoder': 'x', 'my_sample_len': 0, 'my_ffn_shift': 1, 'my_att_shift': 1, 'my_pos_emb': 0, 'load_partial': 0, 'magic_prime': 0, 'my_qa_mask': 0, 'my_testing': '', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': False, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': None, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'ddp_find_unused_parameters_false', 'sync_batchnorm': False, 'precision': 'tf32', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2024-04-09-02-28-53', 'betas': (0.9, 0.99), 'real_bsz': 16, 'run_name': '0 ctx128 L12 D768'}\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "Building dummy data...\n", "pipe": "stderr"} +{"event": "line", "data": "Building token list...\n", "pipe": "stderr"} +{"event": "line", "data": "Data has 1620950 tokens, 13 vocab size.\n", "pipe": "stderr"} +{"event": "line", "data": "RWKV_MY_TESTING \n", "pipe": "stdout"} +{"event": "line", "data": "Using /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118 as PyTorch extensions root...\n", "pipe": "stderr"} +{"event": "line", "data": "Detected CUDA files, patching ldflags\n", "pipe": "stderr"} +{"event": "line", "data": "Emitting ninja build file /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118/wkv_128/build.ninja...\n", "pipe": "stderr"} +{"event": "line", "data": "Building extension module wkv_128...\n", "pipe": "stderr"} +{"event": "line", "data": "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", "pipe": "stderr"} +{"event": "line", "data": "ninja: no work to do.\n", "pipe": "stdout"} +{"event": "line", "data": "Loading extension module wkv_128...\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "line", "data": "############################################################################\n", "pipe": "stdout"} +{"event": "line", "data": "#\n", "pipe": "stdout"} +{"event": "line", "data": "# Init model weight (slow for large models)...\n", "pipe": "stdout"} +{"event": "line", "data": "#\n", "pipe": "stdout"} +{"event": "line", "data": "############################################################################\n", "pipe": "stdout"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "line", "data": "13 768 -0.0006 emb.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.0.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.0.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.0.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.0.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.0.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.0.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.0.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.1.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.1.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.1.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.1.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.1.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.1.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.1.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.2.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.2.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.2.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.2.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.2.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.2.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.2.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.3.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.3.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.3.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.3.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.3.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.3.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.3.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.4.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.4.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.4.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.4.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.4.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.4.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.4.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.5.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.5.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.5.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.5.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.5.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.5.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.5.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.6.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.6.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.6.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.6.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.6.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.6.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.6.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.7.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.7.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.7.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.7.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.7.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.7.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.7.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.8.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.8.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.8.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.8.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.8.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.8.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.8.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.9.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.9.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.9.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.9.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.9.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.9.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.9.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.10.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.10.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.10.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.10.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.10.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.10.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.10.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.11.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.11.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.11.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.11.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.11.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.11.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.11.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "13 768 0.5 head.weight\n", "pipe": "stdout"} +{"event": "line", "data": "GPU available: True (cuda), used: True\n", "pipe": "stderr"} +{"event": "line", "data": "TPU available: False, using: 0 TPU cores\n", "pipe": "stderr"} +{"event": "line", "data": "IPU available: False, using: 0 IPUs\n", "pipe": "stderr"} +{"event": "line", "data": "HPU available: False, using: 0 HPUs\n", "pipe": "stderr"} +{"event": "line", "data": "13 768 emb.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln0.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln0.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.0.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.0.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.1.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.1.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.2.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.2.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.3.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.3.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.4.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.4.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.5.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.5.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.6.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.6.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.7.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.7.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.8.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.8.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.9.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.9.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.10.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.10.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.11.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.11.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 ln_out.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 ln_out.bias\n", "pipe": "stdout"} +{"event": "line", "data": "13 768 head.weight\n", "pipe": "stdout"} +{"event": "line", "data": "[rank: 0] Global seed set to 1234\n", "pipe": "stderr"} +{"event": "line", "data": "Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/1\n", "pipe": "stderr"} +{"event": "line", "data": "----------------------------------------------------------------------------------------------------\n", "pipe": "stderr"} +{"event": "line", "data": "distributed_backend=nccl\n", "pipe": "stderr"} +{"event": "line", "data": "All distributed processes registered. Starting with 1 processes\n", "pipe": "stderr"} +{"event": "line", "data": "----------------------------------------------------------------------------------------------------\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1571.25, 81920.0], "load": 0.12, "temperature": 39, "power": 75.12}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [885.6875, 81920.0], "load": 0, "temperature": 39, "power": 75.234}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1415.25, 81920.0], "load": 0, "temperature": 39, "power": 74.278}}}, "pipe": "data"} +{"event": "error", "data": {"type": "Exception", "message": ">- DeepSpeed Op Builder: Installed CUDA version 12.2 does not match the version torch was compiled with 11.8, unable to compile cuda/cpp extensions without a matching cuda version."}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py\", line 420, in \n", "pipe": "stderr"} +{"event": "line", "data": " trainer.fit(model, data_loader)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 608, in fit\n", "pipe": "stderr"} +{"event": "line", "data": " call._call_and_handle_interrupt(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py\", line 36, in _call_and_handle_interrupt\n", "pipe": "stderr"} +{"event": "line", "data": " return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/launchers/subprocess_script.py\", line 88, in launch\n", "pipe": "stderr"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": " return function(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 650, in _fit_impl\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(model, ckpt_path=self.ckpt_path)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 1093, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " self.strategy.setup(self)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/ddp.py\", line 181, in setup\n", "pipe": "stderr"} +{"event": "line", "data": " self.setup_optimizers(trainer)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/strategy.py\", line 142, in setup_optimizers\n", "pipe": "stderr"} +{"event": "line", "data": " self.optimizers, self.lr_scheduler_configs, self.optimizer_frequencies = _init_optimizers_and_lr_schedulers(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/core/optimizer.py\", line 180, in _init_optimizers_and_lr_schedulers\n", "pipe": "stderr"} +{"event": "line", "data": " optim_conf = model.trainer._call_lightning_module_hook(\"configure_optimizers\", pl_module=model)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 1356, in _call_lightning_module_hook\n", "pipe": "stderr"} +{"event": "line", "data": " output = fn(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/src/model.py\", line 606, in configure_optimizers\n", "pipe": "stderr"} +{"event": "line", "data": " return FusedAdam(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/adam/fused_adam.py\", line 94, in __init__\n", "pipe": "stderr"} +{"event": "line", "data": " fused_adam_cuda = FusedAdamBuilder().load()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py\", line 452, in load\n", "pipe": "stderr"} +{"event": "line", "data": " return self.jit_load(verbose)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py\", line 467, in jit_load\n", "pipe": "stderr"} +{"event": "line", "data": " assert_no_cuda_mismatch(self.name)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py\", line 96, in assert_no_cuda_mismatch\n", "pipe": "stderr"} +{"event": "line", "data": " raise Exception(f\">- DeepSpeed Op Builder: Installed CUDA version {sys_cuda_version} does not match the \"\n", "pipe": "stderr"} +{"event": "line", "data": "Exception: >- DeepSpeed Op Builder: Installed CUDA version 12.2 does not match the version torch was compiled with 11.8, unable to compile cuda/cpp extensions without a matching cuda version.\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv/voirconf-rwkv.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py", "--data_type", "dummy", "--ctx_len", "128", "--epoch_steps", "1000", "--epoch_count", "20", "--epoch_begin", "0", "--epoch_save", "0", "--micro_bsz", "16", "--n_layer", "12", "--n_embd", "768", "--pre_ffn", "0", "--head_qk", "0", "--lr_init", "6e-4", "--lr_final", "1e-5", "--warmup_steps", "0", "--beta1", "0.9", "--beta2", "0.99", "--adam_eps", "1e-8", "--accelerator", "gpu", "--devices", "1", "--precision", "tf32", "--strategy", "ddp_find_unused_parameters_false", "--grad_cp", "0", "--random_seed", "1234", "--enable_progress_bar", "False"], "time": 1712629741.0518954, "return_code": 1}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/stargan.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/stargan.D0.data new file mode 100644 index 000000000..2be4363bd --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/stargan.D0.data @@ -0,0 +1,678 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "stargan", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "tags": ["gan", "resnet", "vision"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan", "plan": {"method": "per_gpu"}, "argv": {"--image_size": 512, "--c_dim": 5, "--batch_size": 16}, "weight": 1.0, "name": "stargan", "tag": ["stargan", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 1.0, "memory": 0.010771942138671876}, "temperature": 62, "power": 92.005, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.84, "memory": 0.010771942138671876}, "temperature": 65, "power": 99.698, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712629286.864596, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/voirconf-stargan.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py", "--image_size", "512", "--c_dim", "5", "--batch_size", "16"], "time": 1712629289.9173884}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Namespace(c_dim=5, c2_dim=8, celeba_crop_size=178, rafd_crop_size=256, image_size=512, g_conv_dim=64, d_conv_dim=64, g_repeat_num=6, d_repeat_num=6, lambda_cls=1, lambda_rec=10, lambda_gp=10, dataset='synth', batch_size=16, num_iters=200000, num_iters_decay=100000, g_lr=0.0001, d_lr=0.0001, n_critic=5, beta1=0.5, beta2=0.999, resume_iters=None, selected_attrs=['Black_Hair', 'Blond_Hair', 'Brown_Hair', 'Male', 'Young'], test_iters=200000, num_workers=1, mode='train', use_tensorboard=False, celeba_image_dir='data/celeba/images', attr_path='data/celeba/list_attr_celeba.txt', rafd_image_dir='data/RaFD/train', log_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/logs', model_save_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/models', sample_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/samples', result_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/results', log_step=10, sample_step=1000, model_save_step=10000, lr_update_step=1000)\n", "pipe": "stdout"} +{"event": "line", "data": "Generator(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(8, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (5): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (6): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (7): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (8): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (9): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (10): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (11): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (12): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (13): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (14): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (15): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (16): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (17): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (18): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (19): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (20): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (21): Conv2d(64, 3, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (22): Tanh()\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": ")\n", "pipe": "stdout"} +{"event": "line", "data": "G\n", "pipe": "stdout"} +{"event": "line", "data": "The number of parameters: 8430528\n", "pipe": "stdout"} +{"event": "line", "data": "Discriminator(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (1): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (3): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (5): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (6): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (7): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (8): Conv2d(512, 1024, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (9): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (10): Conv2d(1024, 2048, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (11): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (conv1): Conv2d(2048, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (conv2): Conv2d(2048, 5, kernel_size=(8, 8), stride=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": ")\n", "pipe": "stdout"} +{"event": "line", "data": "D\n", "pipe": "stdout"} +{"event": "line", "data": "The number of parameters: 45376448\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [0, 10000]}, "pipe": "data"} +{"event": "line", "data": "Start training...\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [1, 10000]}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(warning.format(ret))\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 13.088788986206055}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [17747.25, 81920.0], "load": 0.89, "temperature": 63, "power": 337.032}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35637.25, 81920.0], "load": 0, "temperature": 58, "power": 85.297}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [18787.25, 81920.0], "load": 0.99, "temperature": 60, "power": 159.346}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.347299575805664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.765205383300781}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.148660659790039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 5.321765856146238, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.001835823059082}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 71.05201793725955, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35827.25, 81920.0], "load": 0, "temperature": 58, "power": 84.797}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [26905.25, 81920.0], "load": 1.0, "temperature": 62, "power": 308.802}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.0241708755493164}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.705942153930664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.942171096801758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 6.9168630886224625, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.02168607711792}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.3637683391571045}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:17], Iteration [10/200000], D/loss_real: -1.3520, D/loss_fake: 0.1343, D/loss_cls: 3.2982, D/loss_gp: 0.0283, G/loss_fake: -0.3874, G/loss_rec: 0.5371, G/loss_cls: 3.3738\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 72.66281470035138, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36415.25, 81920.0], "load": 0.01, "temperature": 61, "power": 330.995}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.5255987644195557}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.0013346672058105}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8241424560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 29.907896594960363, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.37147057056427}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.711289882659912}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 50.56945033291015, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.1745004653930664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.857970952987671}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.572556018829346}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.5107901096343994}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.390759229660034}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.2796459272961, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:21], Iteration [20/200000], D/loss_real: -2.9224, D/loss_fake: 1.1207, D/loss_cls: 3.5014, D/loss_gp: 0.0691, G/loss_fake: -1.1511, G/loss_rec: 0.5308, G/loss_cls: 3.3935\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36415.25, 81920.0], "load": 0.91, "temperature": 62, "power": 234.487}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.6984118223190308}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.0302462577819824}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.98761943031688, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9795348048210144}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.3558409214019775}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.8812305927276611}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 40.76575951777042, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.635997295379639}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.509818077087402}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.67836856842041}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8538520336151123}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.128528594970703}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.38333837776703, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:24], Iteration [30/200000], D/loss_real: -0.3412, D/loss_fake: -0.5662, D/loss_cls: 3.4076, D/loss_gp: 0.2628, G/loss_fake: 0.6678, G/loss_rec: 0.5205, G/loss_cls: 3.3943\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36415.25, 81920.0], "load": 0.99, "temperature": 63, "power": 273.676}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9309120178222656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.781552791595459}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.115180700915204, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.5070433616638184}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.4026412963867188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.77961540222168}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.65742389827503, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.547173500061035}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.7656924724578857}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.781158924102783}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.116848111152649}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36415.25, 81920.0], "load": 0.92, "temperature": 62, "power": 330.187}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.694348692893982}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.4597579840453, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:28], Iteration [40/200000], D/loss_real: -4.9564, D/loss_fake: 1.6986, D/loss_cls: 3.7842, D/loss_gp: 0.1168, G/loss_fake: -0.7483, G/loss_rec: 0.5075, G/loss_cls: 3.4074\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [41, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.7827353477478027}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9754653573036194}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.49856449114887, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.4446818828582764}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.1061625480651855}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.151052474975586}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.1109598263704, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.1655614376068115}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9414399266242981}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7994539737701416}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6189892292022705}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.5357154607772827}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36415.25, 81920.0], "load": 1.0, "temperature": 64, "power": 160.443}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.17837615048612, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:31], Iteration [50/200000], D/loss_real: -5.5011, D/loss_fake: 2.3659, D/loss_cls: 3.3387, D/loss_gp: 0.0332, G/loss_fake: -2.2765, G/loss_rec: 0.5052, G/loss_cls: 3.3449\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [51, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.8329731225967407}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.8318867683410645}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.410831274287933, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7613564729690552}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.680351734161377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.649629533290863}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.62464521428767, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.1090788841247559}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.075874924659729}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9775517582893372}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.8785903453826904}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.8372830152511597}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.43936230471645, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:35], Iteration [60/200000], D/loss_real: -5.7329, D/loss_fake: 2.9917, D/loss_cls: 3.2994, D/loss_gp: 0.0279, G/loss_fake: -2.6891, G/loss_rec: 0.5080, G/loss_cls: 3.3993\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36415.25, 81920.0], "load": 0.91, "temperature": 64, "power": 277.863}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.0740165710449219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.070267915725708}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.07587872695642, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9656950235366821}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9697949886322021}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.0307152271270752}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.74921142616056, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.8622654676437378}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.3842244148254395}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.3067299127578735}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.6310020685195923}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.892975091934204}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.37326028994648, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:38], Iteration [70/200000], D/loss_real: -4.4789, D/loss_fake: 2.7518, D/loss_cls: 3.4936, D/loss_gp: 0.0126, G/loss_fake: -3.3570, G/loss_rec: 0.5104, G/loss_cls: 3.3366\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37439.25, 81920.0], "load": 0.92, "temperature": 63, "power": 273.232}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.7173616886138916}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.6042852401733398}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 26.7845670408284, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.5841679573059082}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.5252680778503418}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.3489655256271362}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.60266521330369, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.7919559478759766}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.9162724018096924}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.596582293510437}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.5536799430847168}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.569115400314331}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.398752786282486, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:42], Iteration [80/200000], D/loss_real: -5.5468, D/loss_fake: 3.7807, D/loss_cls: 3.3119, D/loss_gp: 0.0023, G/loss_fake: -3.9544, G/loss_rec: 0.5203, G/loss_cls: 3.3647\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37439.25, 81920.0], "load": 0.92, "temperature": 64, "power": 149.63}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.2237725257873535}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.2440035343170166}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 26.941573180895123, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.060624599456787}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.0193865299224854}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.9745678901672363}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.20191296817934, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.609546184539795}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.6157350540161133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.2712361812591553}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37439.25, 81920.0], "load": 0.94, "temperature": 64, "power": 327.185}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.1702215671539307}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.0922915935516357}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.37566285534014, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:45], Iteration [90/200000], D/loss_real: -4.9153, D/loss_fake: 3.7429, D/loss_cls: 3.2592, D/loss_gp: 0.0006, G/loss_fake: -4.2415, G/loss_rec: 0.5404, G/loss_cls: 3.3323\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [91, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.917240619659424}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.2632479667663574}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.023028855727905, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.4661128520965576}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.429866075515747}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.4586143493652344}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.91418612655371, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.5463671684265137}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.921365261077881}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9309892654418945}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.219142436981201}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.5054759979248047}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37439.25, 81920.0], "load": 1.0, "temperature": 64, "power": 322.633}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.07246245137948, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:49], Iteration [100/200000], D/loss_real: -3.7431, D/loss_fake: 3.3646, D/loss_cls: 3.2836, D/loss_gp: 0.0600, G/loss_fake: -4.4910, G/loss_rec: 0.5619, G/loss_cls: 3.3330\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [101, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.512633800506592}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.334502696990967}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.854758182446574, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.142587661743164}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9021825790405273}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.780797243118286}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 40.93901051035916, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.091706275939941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.675341606140137}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.215099334716797}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.9732232093811035}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.266709327697754}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.39124179711606, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:52], Iteration [110/200000], D/loss_real: -7.3892, D/loss_fake: 6.8198, D/loss_cls: 3.4426, D/loss_gp: 0.2394, G/loss_fake: -2.3872, G/loss_rec: 0.5890, G/loss_cls: 3.7180\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37439.25, 81920.0], "load": 0.92, "temperature": 65, "power": 334.123}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.891907215118408}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.586662292480469}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 26.911966369515046, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.657766819000244}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.4655086994171143}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8104851245880127}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.16166170440781, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9253411293029785}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.7890138626098633}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.5714194774627686}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.4397170543670654}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.3061275482177734}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.19806942051031, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:56], Iteration [120/200000], D/loss_real: -3.3827, D/loss_fake: 2.3301, D/loss_cls: 3.2650, D/loss_gp: 0.0094, G/loss_fake: -2.0686, G/loss_rec: 0.5374, G/loss_cls: 3.3515\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37439.25, 81920.0], "load": 0.92, "temperature": 65, "power": 169.784}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.7322309017181396}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.8100779056549072}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.866877037160318, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.2246744632720947}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.062577247619629}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.829522132873535}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 40.44917296474896, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.02593994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.9557509422302246}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9333386421203613}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9968159198760986}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.1741955280303955}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.375884373788544, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:59], Iteration [130/200000], D/loss_real: -1.7741, D/loss_fake: 0.9947, D/loss_cls: 3.2477, D/loss_gp: 0.0706, G/loss_fake: -0.8968, G/loss_rec: 0.5320, G/loss_cls: 3.3663\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [131, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.0910840034484863}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37439.25, 81920.0], "load": 0.92, "temperature": 66, "power": 255.536}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.5162806510925293}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.87960304761794, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.299203634262085}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.015902519226074}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.237825393676758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 40.484916351267735, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.8740644454956055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.552095890045166}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.8658714294433594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.7497985363006592}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 30.080514907836914}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.99477908705211, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:03], Iteration [140/200000], D/loss_real: -2.0696, D/loss_fake: -3.6663, D/loss_cls: 3.2674, D/loss_gp: 3.2549, G/loss_fake: -0.7464, G/loss_rec: 0.5161, G/loss_cls: 3.6752\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37439.25, 81920.0], "load": 0.99, "temperature": 66, "power": 340.009}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.808368682861328}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.929910659790039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.878520593531135, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.430485725402832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [144, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.594335556030273}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [145, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.7370452880859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 40.75020789758538, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [146, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.825671195983887}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [147, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.1161041259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [148, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.3924126625061035}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [149, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.4898860454559326}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37439.25, 81920.0], "load": 0.92, "temperature": 66, "power": 322.086}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [150, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.222084999084473}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.39710187058021, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:07], Iteration [150/200000], D/loss_real: -1.3265, D/loss_fake: 1.0279, D/loss_cls: 3.3112, D/loss_gp: 0.1209, G/loss_fake: 0.0970, G/loss_rec: 0.5125, G/loss_cls: 3.3375\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [151, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.8730874061584473}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [152, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.0204620361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.888466752156187, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [153, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.5442347526550293}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [154, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.678504228591919}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [155, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.496781587600708}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 40.796922529025636, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [156, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.735168933868408}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [157, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.4906234741210938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [158, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.3690757751464844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [159, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.2695112228393555}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [160, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.13275146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.210154106123014, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:10], Iteration [160/200000], D/loss_real: -1.4066, D/loss_fake: 0.2647, D/loss_cls: 3.2157, D/loss_gp: 0.0059, G/loss_fake: -0.1605, G/loss_rec: 0.5019, G/loss_cls: 3.3922\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37439.25, 81920.0], "load": 0.98, "temperature": 66, "power": 334.978}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [161, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.413844347000122}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [162, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.094942092895508}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.636172528554567, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [163, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.6632394790649414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [164, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.2501144409179688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [165, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9416489601135254}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 40.94702661924998, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [166, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.342219352722168}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [167, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.572490215301514}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [168, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.1703543663024902}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [169, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.098162651062012}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [170, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.3821587562561035}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.355312823319956, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:14], Iteration [170/200000], D/loss_real: -1.5472, D/loss_fake: 1.2790, D/loss_cls: 3.7337, D/loss_gp: 0.1917, G/loss_fake: -0.6910, G/loss_rec: 0.5015, G/loss_cls: 3.4506\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37439.25, 81920.0], "load": 0.92, "temperature": 67, "power": 256.407}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [171, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.096679210662842}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [172, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.374026298522949}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 26.83637007575064, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [173, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.3860602378845215}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [174, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.0151114463806152}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [175, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.201622724533081}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.12856364617088, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [176, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.5866336822509766}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [177, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.2773125171661377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [178, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.0255954265594482}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [179, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.8643300533294678}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [180, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.7708011865615845}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37439.25, 81920.0], "load": 0.92, "temperature": 66, "power": 138.372}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.19660787413008, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:17], Iteration [180/200000], D/loss_real: -1.7849, D/loss_fake: 0.0868, D/loss_cls: 3.2189, D/loss_gp: 0.0250, G/loss_fake: -0.5626, G/loss_rec: 0.5023, G/loss_cls: 3.3373\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [181, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.343755722045898}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [182, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.140600681304932}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.874702510933123, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [183, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.7561750411987305}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [184, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.784118413925171}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [185, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.679445743560791}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 40.693325245534936, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [186, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.5780656337738037}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [187, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.79576301574707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [188, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.8380093574523926}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37439.25, 81920.0], "load": 0.92, "temperature": 66, "power": 170.308}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [189, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.2884669303894043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [190, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.333303689956665}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.37582664546587, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:21], Iteration [190/200000], D/loss_real: -3.6277, D/loss_fake: 3.2482, D/loss_cls: 3.6963, D/loss_gp: 0.0017, G/loss_fake: -2.4053, G/loss_rec: 0.4992, G/loss_cls: 3.4864\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [191, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.6365597248077393}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [192, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.4680800437927246}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 29.00034749122625, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [193, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.201497793197632}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [194, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.1161792278289795}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [195, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.010755777359009}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 40.37009791893407, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [196, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.5264806747436523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [197, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.3641257286071777}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [198, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.090949773788452}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [199, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8673770427703857}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37439.25, 81920.0], "load": 1.0, "temperature": 66, "power": 319.323}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [200, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.60195255279541}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.31073700170208, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37439.25, 81920.0], "load": 0.98, "temperature": 67, "power": 286.441}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/voirconf-stargan.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py", "--image_size", "512", "--c_dim", "5", "--batch_size", "16"], "time": 1712629377.9413161, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/stargan.D1.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/stargan.D1.data new file mode 100644 index 000000000..4e0b04b22 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/stargan.D1.data @@ -0,0 +1,673 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "stargan", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "tags": ["gan", "resnet", "vision"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan", "plan": {"method": "per_gpu"}, "argv": {"--image_size": 512, "--c_dim": 5, "--batch_size": 16}, "weight": 1.0, "name": "stargan", "tag": ["stargan", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 61, "power": 87.666, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 63, "power": 95.972, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712629289.89834, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/voirconf-stargan.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py", "--image_size", "512", "--c_dim", "5", "--batch_size", "16"], "time": 1712629289.9248707}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Namespace(c_dim=5, c2_dim=8, celeba_crop_size=178, rafd_crop_size=256, image_size=512, g_conv_dim=64, d_conv_dim=64, g_repeat_num=6, d_repeat_num=6, lambda_cls=1, lambda_rec=10, lambda_gp=10, dataset='synth', batch_size=16, num_iters=200000, num_iters_decay=100000, g_lr=0.0001, d_lr=0.0001, n_critic=5, beta1=0.5, beta2=0.999, resume_iters=None, selected_attrs=['Black_Hair', 'Blond_Hair', 'Brown_Hair', 'Male', 'Young'], test_iters=200000, num_workers=1, mode='train', use_tensorboard=False, celeba_image_dir='data/celeba/images', attr_path='data/celeba/list_attr_celeba.txt', rafd_image_dir='data/RaFD/train', log_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/logs', model_save_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/models', sample_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/samples', result_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/results', log_step=10, sample_step=1000, model_save_step=10000, lr_update_step=1000)\n", "pipe": "stdout"} +{"event": "line", "data": "Generator(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(8, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (5): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (6): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (7): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (8): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (9): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (10): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (11): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (12): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (13): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (14): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (15): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (16): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (17): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (18): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (19): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (20): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (21): Conv2d(64, 3, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (22): Tanh()\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": ")\n", "pipe": "stdout"} +{"event": "line", "data": "G\n", "pipe": "stdout"} +{"event": "line", "data": "The number of parameters: 8430528\n", "pipe": "stdout"} +{"event": "line", "data": "Discriminator(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (1): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (3): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (5): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (6): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (7): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (8): Conv2d(512, 1024, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (9): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (10): Conv2d(1024, 2048, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (11): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (conv1): Conv2d(2048, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (conv2): Conv2d(2048, 5, kernel_size=(8, 8), stride=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": ")\n", "pipe": "stdout"} +{"event": "line", "data": "D\n", "pipe": "stdout"} +{"event": "line", "data": "The number of parameters: 45376448\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [0, 10000]}, "pipe": "data"} +{"event": "line", "data": "Start training...\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [1, 10000]}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(warning.format(ret))\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 13.117162704467773}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [17747.25, 81920.0], "load": 0.91, "temperature": 65, "power": 298.557}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [36659.25, 81920.0], "load": 1.0, "temperature": 60, "power": 95.484}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [18787.25, 81920.0], "load": 1.0, "temperature": 61, "power": 159.481}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.374086380004883}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.837044715881348}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 3.5644554129826806, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.91529655456543}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.361237525939941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 70.96649621593558, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [34675.25, 81920.0], "load": 0.26, "temperature": 61, "power": 97.037}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [26905.25, 81920.0], "load": 1.0, "temperature": 65, "power": 331.144}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.1690869331359863}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.914985418319702}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 4.771466170185791, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.957707643508911}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.6458146572113037}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.0712966918945312}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:17], Iteration [10/200000], D/loss_real: -3.7110, D/loss_fake: 1.4733, D/loss_cls: 3.9021, D/loss_gp: 0.0407, G/loss_fake: -0.3167, G/loss_rec: 0.5357, G/loss_cls: 3.3451\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 72.16912274604834, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [36415.25, 81920.0], "load": 0.23, "temperature": 63, "power": 318.74}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.823209524154663}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.3767096996307373}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 23.101178275670144, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.34702467918396}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.507729530334473}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.174344539642334}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.24487256199587, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.328680992126465}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 25.784975051879883}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.873533248901367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.156200885772705}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.227806091308594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.5605767341765, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:21], Iteration [20/200000], D/loss_real: -0.4957, D/loss_fake: 0.1084, D/loss_cls: 3.5116, D/loss_gp: 0.4103, G/loss_fake: -0.0928, G/loss_rec: 0.5292, G/loss_cls: 3.4516\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [36415.25, 81920.0], "load": 0.91, "temperature": 64, "power": 306.894}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.657013893127441}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.810525894165039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.654198013277302, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.105544090270996}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8010425567626953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.6122111082077026}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.800994331823475, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.967618465423584}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.832473874092102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.7472294569015503}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.2564595937728882}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.1936883926391602}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.93517083544523, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:24], Iteration [30/200000], D/loss_real: -3.9503, D/loss_fake: 1.1173, D/loss_cls: 3.3091, D/loss_gp: 0.0718, G/loss_fake: -0.8881, G/loss_rec: 0.5278, G/loss_cls: 3.3618\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [36415.25, 81920.0], "load": 0.92, "temperature": 65, "power": 162.736}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.1719605922698975}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9717473387718201}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.68325765269247, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6757186055183411}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9522519707679749}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.4613873958587646}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.614085749820276, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.8069730997085571}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.7081425189971924}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.232840895652771}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [37439.25, 81920.0], "load": 0.91, "temperature": 66, "power": 280.865}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.2449036836624146}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9748879075050354}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.592685792627165, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:28], Iteration [40/200000], D/loss_real: -3.2386, D/loss_fake: 0.9107, D/loss_cls: 3.2716, D/loss_gp: 0.0031, G/loss_fake: -1.3798, G/loss_rec: 0.5149, G/loss_cls: 3.3843\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [41, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.420103073120117}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.3475394248962402}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.70986224661781, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.60659658908844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.4509639739990234}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.1284641027450562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.644120118687525, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.4597594738006592}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.1742852926254272}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9048216342926025}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.713738203048706}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [37439.25, 81920.0], "load": 0.91, "temperature": 66, "power": 334.445}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6789039969444275}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.79940390988343, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:32], Iteration [50/200000], D/loss_real: -3.7488, D/loss_fake: 0.9154, D/loss_cls: 3.2423, D/loss_gp: 0.0270, G/loss_fake: -0.3506, G/loss_rec: 0.5319, G/loss_cls: 3.5562\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [51, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.902226209640503}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.9809566736221313}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.747939275842857, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.6468143463134766}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.124035835266113}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.118898868560791}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.607951681487386, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.307922601699829}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.170870065689087}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.8327711820602417}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.5237656831741333}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 36.36648387998694, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.3801943063735962}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [37439.25, 81920.0], "load": 0.39, "temperature": 62, "power": 95.198}}}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:35], Iteration [60/200000], D/loss_real: -4.6709, D/loss_fake: 2.4669, D/loss_cls: 3.3222, D/loss_gp: 0.0262, G/loss_fake: -2.0183, G/loss_rec: 0.5316, G/loss_cls: 3.3560\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [61, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.7969355583190918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.25202060681636, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.578590989112854}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.3615702390670776}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.1795843839645386}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.042553186416626}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 69.85708675394972, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.9258793592453003}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.1267526149749756}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.51273731649128, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.8131579160690308}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.9471697807312012}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.5509424209594727}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:39], Iteration [70/200000], D/loss_real: -4.7812, D/loss_fake: 2.8295, D/loss_cls: 3.3751, D/loss_gp: 0.0128, G/loss_fake: -2.7447, G/loss_rec: 0.5448, G/loss_cls: 3.3603\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [37439.25, 81920.0], "load": 0.93, "temperature": 65, "power": 320.684}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.80185680724953, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8232421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.232140064239502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.3883509635925293}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.7699084281921387}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.853001117706299}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.51573540545772, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.971715211868286}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9068543910980225}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.63186522382633, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.544017791748047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.3620781898498535}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [37439.25, 81920.0], "load": 0.92, "temperature": 66, "power": 239.438}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.164822578430176}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:42], Iteration [80/200000], D/loss_real: -3.9252, D/loss_fake: 2.8151, D/loss_cls: 3.2651, D/loss_gp: 0.0010, G/loss_fake: -2.7782, G/loss_rec: 0.6003, G/loss_cls: 3.3240\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 39.59758792877036, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.117978811264038}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.662733554840088}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.45750093460083}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.0194272994995117}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.1394569873809814}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.79298465533052, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.878525972366333}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [37439.25, 81920.0], "load": 1.0, "temperature": 66, "power": 232.792}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.077016830444336}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.646314769994685, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.0557446479797363}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9501938819885254}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.4241585731506348}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:46], Iteration [90/200000], D/loss_real: -3.4396, D/loss_fake: 2.5225, D/loss_cls: 3.2247, D/loss_gp: 0.0117, G/loss_fake: -1.1645, G/loss_rec: 0.5763, G/loss_cls: 3.3358\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 39.71005179523523, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.929978847503662}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9877097606658936}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.885918140411377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.233335971832275}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.6596221923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.61690693395299, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.3517282009124756}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.0608034133911133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.751354315698542, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [37439.25, 81920.0], "load": 1.0, "temperature": 67, "power": 302.538}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9199769496917725}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.6425728797912598}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.4831550121307373}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:50], Iteration [100/200000], D/loss_real: -3.5094, D/loss_fake: 2.6446, D/loss_cls: 3.2567, D/loss_gp: 0.0091, G/loss_fake: -2.0861, G/loss_rec: 0.5683, G/loss_cls: 3.3394\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 39.763006253193325, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.254141092300415}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.121814489364624}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8270702362060547}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.576064109802246}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.336366653442383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.70499226734117, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9559030532836914}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.721940755844116}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.67879578618367, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.3072452545166016}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [37439.25, 81920.0], "load": 0.91, "temperature": 66, "power": 142.599}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.0070741176605225}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.7819263935089111}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:53], Iteration [110/200000], D/loss_real: -4.3040, D/loss_fake: 2.7254, D/loss_cls: 3.2333, D/loss_gp: 0.0127, G/loss_fake: -2.2234, G/loss_rec: 0.5506, G/loss_cls: 3.3515\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 39.666624072700834, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.279218912124634}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.1437339782714844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.1855006217956543}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.6592161655426025}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.7802999019622803}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.75500591999918, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.030146360397339}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.645211696624756}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.7925164074942, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.586724281311035}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.7555782794952393}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8121533393859863}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [37439.25, 81920.0], "load": 0.98, "temperature": 66, "power": 300.5}}}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:57], Iteration [120/200000], D/loss_real: -2.6380, D/loss_fake: 1.9244, D/loss_cls: 3.2169, D/loss_gp: 0.0309, G/loss_fake: -2.3458, G/loss_rec: 0.5360, G/loss_cls: 3.3212\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 39.55651245725928, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.4266104698181152}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.0893189907073975}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.7753570079803467}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.488940954208374}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.2048962116241455}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.84155744069488, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.884321928024292}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.048192024230957}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.716190399839597, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.94441294670105}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.028721809387207}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.537080764770508}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:00], Iteration [130/200000], D/loss_real: -0.1760, D/loss_fake: -0.3434, D/loss_cls: 3.2205, D/loss_gp: 0.0836, G/loss_fake: 3.5118, G/loss_rec: 0.5229, G/loss_cls: 3.9674", "pipe": "stdout"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 39.63130321583545, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.523904800415039}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [37439.25, 81920.0], "load": 1.0, "temperature": 68, "power": 307.995}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.030285358428955}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.7215495109558105}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.005046844482422}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.571880340576172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.733938712246015, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.694963455200195}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.498873710632324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.719580476143435, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.15371036529541}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.867981910705566}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.373290538787842}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:04], Iteration [140/200000], D/loss_real: -0.1060, D/loss_fake: -0.3454, D/loss_cls: 3.3459, D/loss_gp: 0.1479, G/loss_fake: 1.3458, G/loss_rec: 0.5106, G/loss_cls: 5.6234\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 39.69426195990676, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.785976886749268}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [37439.25, 81920.0], "load": 0.99, "temperature": 68, "power": 301.652}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.899368286132812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.787163734436035}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [144, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.610788345336914}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [145, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.186668872833252}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.50312921277573, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [146, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.649548530578613}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [147, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.467691421508789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.70854004603594, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [148, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.3241255283355713}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [149, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.481044054031372}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [150, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.2709767818450928}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:07], Iteration [150/200000], D/loss_real: -0.5647, D/loss_fake: 0.4880, D/loss_cls: 3.2824, D/loss_gp: 0.0065, G/loss_fake: -0.4803, G/loss_rec: 0.5225, G/loss_cls: 3.3130\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 39.63585633763892, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [151, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.234339714050293}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [152, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.2089760303497314}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [37439.25, 81920.0], "load": 1.0, "temperature": 68, "power": 208.809}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [153, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.1807303428649902}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [154, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.1463265419006348}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [155, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.116842746734619}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.685289415939764, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [156, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.13222074508667}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [157, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.073216199874878}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.62457249268071, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [158, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9962058067321777}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [159, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9162726402282715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [160, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8041117191314697}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:11], Iteration [160/200000], D/loss_real: -1.3308, D/loss_fake: 0.8458, D/loss_cls: 3.2600, D/loss_gp: 0.0029, G/loss_fake: -0.8692, G/loss_rec: 0.5098, G/loss_cls: 3.4413\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 39.27926133142881, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [161, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.020336389541626}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [162, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.470144033432007}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [163, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.49948263168335}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [37439.25, 81920.0], "load": 1.0, "temperature": 68, "power": 132.1}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [164, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.8546977043151855}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [165, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.809318780899048}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.71957879590203, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [166, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.4129638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [167, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.893899440765381}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.680114297246252, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [168, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8180296421051025}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [169, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.719712257385254}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [170, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.6318507194519043}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:15], Iteration [170/200000], D/loss_real: -1.4690, D/loss_fake: 0.7819, D/loss_cls: 3.2486, D/loss_gp: 0.0070, G/loss_fake: -0.7509, G/loss_rec: 0.5054, G/loss_cls: 3.3286\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 39.44526405996381, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [171, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.160916328430176}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [172, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9644196033477783}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [173, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.897082805633545}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [174, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.7842605113983154}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [37439.25, 81920.0], "load": 0.91, "temperature": 68, "power": 161.718}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [175, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.6926417350769043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.687705220081845, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [176, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.3679590225219727}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [177, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.2222020626068115}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.64188876858947, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [178, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.0122861862182617}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [179, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8562798500061035}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [180, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.6610898971557617}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:18], Iteration [180/200000], D/loss_real: -1.1449, D/loss_fake: 0.5474, D/loss_cls: 3.2245, D/loss_gp: 0.0034, G/loss_fake: -0.5842, G/loss_rec: 0.5028, G/loss_cls: 3.3265\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 39.66980212099391, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [181, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.4245619773864746}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [182, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.107537031173706}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [37439.25, 81920.0], "load": 0.92, "temperature": 68, "power": 383.241}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [183, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.822556257247925}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [184, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9476959705352783}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [185, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.807695150375366}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.67183919510209, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [186, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.4102208614349365}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [187, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.2997803688049316}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.719820988853176, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [188, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.601500511169434}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [189, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.485925197601318}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [190, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.328277587890625}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:22], Iteration [190/200000], D/loss_real: -0.9758, D/loss_fake: 0.7780, D/loss_cls: 3.6351, D/loss_gp: 0.0891, G/loss_fake: -0.9418, G/loss_rec: 0.4990, G/loss_cls: 3.3296", "pipe": "stdout"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 39.66257802116616, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [191, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.4807090759277344}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [37439.25, 81920.0], "load": 1.0, "temperature": 69, "power": 339.644}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [192, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.2192721366882324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [193, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.2817349433898926}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [194, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.2091140747070312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [195, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.094062328338623}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.688902797751275, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [196, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.0957794189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [197, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.047666311264038}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.686056105070627, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [37439.25, 81920.0], "load": 0.92, "temperature": 69, "power": 351.806}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/voirconf-stargan.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py", "--image_size", "512", "--c_dim", "5", "--batch_size", "16"], "time": 1712629378.7320774, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/super-slomo.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/super-slomo.D0.data new file mode 100644 index 000000000..65ad09c8e --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/super-slomo.D0.data @@ -0,0 +1,394 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "super-slomo", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "tags": ["convnet", "unet", "video-interpolation", "vision"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo", "plan": {"method": "per_gpu"}, "argv": {"--train_batch_size": 32}, "weight": 1.0, "name": "super-slomo", "tag": ["super-slomo", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 1.0, "memory": 0.010771942138671876}, "temperature": 62, "power": 91.418, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 64, "power": 100.259, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712629381.710768, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo/voirconf-super-slomo.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py", "--train_batch_size", "32"], "time": 1712629384.8112764}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(msg)\n", "pipe": "stderr"} +{"event": "line", "data": "Epoch: 0\n", "pipe": "stdout"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:136: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\"Detected call of `lr_scheduler.step()` before `optimizer.step()`. \"\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "progress": [0, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1469.25, 81920.0], "load": 0, "temperature": 57, "power": 84.351}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1469.25, 81920.0], "load": 0, "temperature": 56, "power": 83.959}}}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/functional.py:4296: UserWarning: Default grid_sample and affine_grid behavior has changed to align_corners=False since 1.3.0. Please specify align_corners=True if the old behavior is desired. See the documentation of grid_sample for details.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 328.314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33813.25, 81920.0], "load": 0, "temperature": 56, "power": 83.318}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3016052246094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2931213378906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2869873046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2837829589844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2828063964844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.28228759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33813.25, 81920.0], "load": 0.82, "temperature": 61, "power": 286.13}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.28192138671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2815856933594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.28125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.087945408210025, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.28094482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2806396484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.26938122884483, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.28033447265625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33813.25, 81920.0], "load": 1.0, "temperature": 61, "power": 147.165}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2799987792969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.89666566602712, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.27960205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2791748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.351585395040665, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2787170410156}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.67760310773834, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.27825927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2777404785156}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33813.25, 81920.0], "load": 1.0, "temperature": 60, "power": 255.598}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.35016382405005, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2771301269531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2764587402344}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.676974672658105, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2757568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2752685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.64710027320806, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.274658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33813.25, 81920.0], "load": 0.81, "temperature": 62, "power": 210.463}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.68384607083844, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2738342285156}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.27337646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.17468823347984, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.27252197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.27197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.60142222114996, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2713623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2707214355469}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.54678084096477, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33813.25, 81920.0], "load": 1.0, "temperature": 61, "power": 238.446}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.26995849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.857878938022246, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2691345214844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.268310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.217828582279296, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2674560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2664489746094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.52458998465754, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2654724121094}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33813.25, 81920.0], "load": 1.0, "temperature": 63, "power": 288.417}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.26446533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.18197776103379, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2632751464844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.57866982195738, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2620849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.26104736328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.17160775822564, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2598876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33813.25, 81920.0], "load": 0.81, "temperature": 63, "power": 353.281}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2581787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.68128756853614, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2569274902344}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2553405761719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.87941047582586, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.253662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 36.39245236232251, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.25177001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.249267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.15671940398074, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33813.25, 81920.0], "load": 1.0, "temperature": 63, "power": 95.56}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2460021972656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2420654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.193313333401974, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2430114746094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.385896657749825, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2450256347656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.24798583984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.17167364529753, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.24566650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33813.25, 81920.0], "load": 1.0, "temperature": 64, "power": 345.989}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.245849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.261666746807556, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2398681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2358703613281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.15941518617261, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2667236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 36.21452265507543, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2394104003906}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33813.25, 81920.0], "load": 0.8, "temperature": 64, "power": 325.959}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2447814941406}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.77032475848589, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.25091552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2505187988281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.56352154722965, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2515869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.1264911087421, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2521057128906}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33813.25, 81920.0], "load": 0.79, "temperature": 64, "power": 245.608}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.25201416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 37.94913305202577, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2513122558594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2497863769531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.296746092818246, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2498474121094}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33813.25, 81920.0], "load": 1.0, "temperature": 65, "power": 298.578}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.25042724609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.56179126633293, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2477111816406}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 36.39305667607629, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2473449707031}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2450866699219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 37.8042746823275, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2394104003906}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33813.25, 81920.0], "load": 1.0, "temperature": 66, "power": 263.962}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.24456787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.86290672772089, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2381591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2358093261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.57165805418152, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.23907470703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.23704618194818, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.23529052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.23101806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.11609515444215, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33813.25, 81920.0], "load": 0.8, "temperature": 65, "power": 345.237}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2321472167969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2308654785156}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.5749592504829, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2259216308594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.36551648401259, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2241516113281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.5816936702813, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2201232910156}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33813.25, 81920.0], "load": 1.0, "temperature": 66, "power": 299.202}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.22064208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.142514856952054, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2171936035156}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2168884277344}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.68935618019227, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2135314941406}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.21484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.69229896500827, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33813.25, 81920.0], "load": 1.0, "temperature": 66, "power": 332.167}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.20989990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.42152324598281, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2107849121094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.20733642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.05395537500534, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2071228027344}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2040710449219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.74597140156746, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2030029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33813.25, 81920.0], "load": 0.82, "temperature": 66, "power": 228.341}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.20159912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.61164663446779, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.19964599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.12989647007101, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1986083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1972351074219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 37.96336178257838, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.19512939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1934814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33813.25, 81920.0], "load": 1.0, "temperature": 66, "power": 292.096}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.1758629442192, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1920471191406}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1861572265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.95273802977029, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.18524169921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 36.16502082275099, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1852722167969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1785583496094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 40.11348093728789, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33813.25, 81920.0], "load": 1.0, "temperature": 67, "power": 289.258}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.178466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1761474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.855582307709604, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1727294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.3702232784772, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.16827392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1676025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 37.9201588933342, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1625671386719}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33813.25, 81920.0], "load": 0.81, "temperature": 66, "power": 314.71}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.15899658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.15305862821272, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1564636230469}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.150634765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.6192501283298, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33813.25, 81920.0], "load": 0.81, "temperature": 67, "power": 346.747}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo/voirconf-super-slomo.D0-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py", "--train_batch_size", "32"], "time": 1712629473.4283774, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/super-slomo.D1.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/super-slomo.D1.data new file mode 100644 index 000000000..f9f330d1e --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/super-slomo.D1.data @@ -0,0 +1,390 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "super-slomo", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "tags": ["convnet", "unet", "video-interpolation", "vision"], "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo", "plan": {"method": "per_gpu"}, "argv": {"--train_batch_size": 32}, "weight": 1.0, "name": "super-slomo", "tag": ["super-slomo", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 60, "power": 86.982, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 62, "power": 94.384, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712629384.792975, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo/voirconf-super-slomo.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py", "--train_batch_size", "32"], "time": 1712629384.8191264}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(msg)\n", "pipe": "stderr"} +{"event": "line", "data": "Epoch: 0\n", "pipe": "stdout"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:136: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\"Detected call of `lr_scheduler.step()` before `optimizer.step()`. \"\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "progress": [0, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1469.25, 81920.0], "load": 0, "temperature": 59, "power": 91.423}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1469.25, 81920.0], "load": 0, "temperature": 58, "power": 90.886}}}, "pipe": "data"} +{"event": "line", "data": "/mnt/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/functional.py:4296: UserWarning: Default grid_sample and affine_grid behavior has changed to align_corners=False since 1.3.0. Please specify align_corners=True if the old behavior is desired. See the documentation of grid_sample for details.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 328.42889404296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4019470214844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3818054199219}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33813.25, 81920.0], "load": 0.83, "temperature": 62, "power": 268.955}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3659362792969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3523864746094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.33990478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3294372558594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3222961425781}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33813.25, 81920.0], "load": 0.98, "temperature": 63, "power": 198.587}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3189697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3183288574219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.81908579949957, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3184509277344}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.318603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.13576826765613, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.31854248046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3183288574219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.48103374749389, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33813.25, 81920.0], "load": 1.0, "temperature": 64, "power": 278.745}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.318115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3177490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.992298004011246, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.31817626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.48841187811478, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.31707763671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.31689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.24153255762038, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.31671142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33813.25, 81920.0], "load": 1.0, "temperature": 64, "power": 326.425}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3163757324219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.48503189534554, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3159484863281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3153991699219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.06528407727076, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3147277832031}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.48020247328055, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3139343261719}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33813.25, 81920.0], "load": 0.8, "temperature": 64, "power": 243.459}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3127746582031}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.095595536233596, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3109436035156}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.30816650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.74263246923156, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.30621337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3096618652344}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.80875659442681, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.30340576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33813.25, 81920.0], "load": 1.0, "temperature": 66, "power": 254.728}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.36318771242276, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3039855957031}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3037414550781}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.12205039979401, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3025817871094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.29974365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.26836275446499, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2944641113281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.28912353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.27521218681221, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33813.25, 81920.0], "load": 1.0, "temperature": 65, "power": 285.611}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2826232910156}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 36.23911226731798, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.27325439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2550048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.58164179082658, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2502136230469}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.65579244074322, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33813.25, 81920.0], "load": 1.0, "temperature": 65, "power": 353.044}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2669982910156}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.54303693485584, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.27764892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.28228759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.281423833272655, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2845764160156}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2864074707031}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.449658625047974, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2878723144531}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33813.25, 81920.0], "load": 1.0, "temperature": 66, "power": 275.363}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.28900146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.972263033535924, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2895812988281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.05282026842934, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.28912353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.28875732421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 37.95544014365797, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2891540527344}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.28857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33813.25, 81920.0], "load": 1.0, "temperature": 66, "power": 315.375}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.36374540751263, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2870178222656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.851254352219826, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.28546142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2846984863281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.0959194906457, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.28363037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.28216552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.967016106241715, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33813.25, 81920.0], "load": 1.0, "temperature": 66, "power": 272.446}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2803955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2780456542969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.82612958808444, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.27532958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 36.28249213671658, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2734680175781}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33813.25, 81920.0], "load": 0.8, "temperature": 67, "power": 356.199}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.17657889695336, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2672424316406}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2637939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.82002152114478, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.25921630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 36.38924310184442, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.25506591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33813.25, 81920.0], "load": 1.0, "temperature": 67, "power": 251.271}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2492370605469}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.78658048037923, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2444152832031}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.29808230464789, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2325439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.555293335275344, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2244567871094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2205810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33813.25, 81920.0], "load": 0.79, "temperature": 67, "power": 322.614}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 37.89634099920612, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2130432128906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2096862792969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.67726259514242, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2118835449219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.21502685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.53876678896817, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2182922363281}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33813.25, 81920.0], "load": 1.0, "temperature": 66, "power": 232.592}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.05918526169607, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2178955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2147216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.186487890259194, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2153015136719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2169189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.5567857551267, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2147521972656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.21527099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.571415755764995, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33813.25, 81920.0], "load": 1.0, "temperature": 67, "power": 267.712}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2117614746094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 36.209163259000626, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2107238769531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.20758056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.38234018714726, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2056884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2027587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.91451010044101, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2006530761719}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33813.25, 81920.0], "load": 0.81, "temperature": 67, "power": 343.211}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 36.007408240687184, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2017517089844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1966552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.910867171751335, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1948547363281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.81655669898627, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1966552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.19891357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 37.85935874090279, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33813.25, 81920.0], "load": 1.0, "temperature": 67, "power": 361.059}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2039489746094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1954650878906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.11410606731622, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1943664550781}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1966247558594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.5727539018504, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.19580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 36.09284125010763, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.197998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33813.25, 81920.0], "load": 1.0, "temperature": 68, "power": 272.092}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.19439697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.443095510614334, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1966857910156}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1904602050781}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.18298245349382, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.189697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 35.94349154310804, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.18255615234375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33813.25, 81920.0], "load": 1.0, "temperature": 68, "power": 347.764}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1829528808594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.9557229826602, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1755065917969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1702880859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.29405445402994, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.16314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 32.96384720137699, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1549377441406}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.1500244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.98760955751266, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33813.25, 81920.0], "load": 1.0, "temperature": 69, "power": 350.926}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [33813.25, 81920.0], "load": 1.0, "temperature": 68, "power": 351.732}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo/voirconf-super-slomo.D1-0efae956f1553a76c1e03985181900f5.json", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py", "--train_batch_size", "32"], "time": 1712629471.8460057, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/t5.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/t5.D0.data new file mode 100644 index 000000000..2a31cf81f --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/t5.D0.data @@ -0,0 +1,585 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "T5", "--batch-size": 16}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "transformer"], "weight": 2.0, "name": "t5", "tag": ["t5", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 69, "power": 97.411, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 71, "power": 110.532, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712628355.690919, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-t5.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "T5", "--batch-size", "16"], "time": 1712628358.7914422}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.970378875732422}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.800580024719238}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.727293014526367}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35457.25, 81920.0], "load": 1.0, "temperature": 68, "power": 186.229}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.682456970214844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.654926300048828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.628446578979492}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.61573600769043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.587346076965332}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.556131362915039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.513788223266602}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.457639694213867}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.321690559387207}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.367353439331055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.313187599182129}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35457.25, 81920.0], "load": 1.0, "temperature": 70, "power": 305.33}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.28883171081543}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.212865829467773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.028278350830078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.84654426574707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.015433311462402}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.654990196228027}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.09386410412216, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.576047897338867}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.431913375854492}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35800552368164}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35457.25, 81920.0], "load": 0.98, "temperature": 71, "power": 296.626}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.1990449973364, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.306499481201172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.0736665725708}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.939047813415527}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.910099029541016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.98101640272451, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.83672046661377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.653719902038574}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.481781959533691}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.35232119843157, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.48547649383545}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.421192169189453}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.146132469177246}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35457.25, 81920.0], "load": 0.98, "temperature": 70, "power": 285.994}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.339488517898126, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.017644882202148}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.908407211303711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.934696197509766}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.41701663579036, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.747555732727051}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.655747413635254}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.503643035888672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.347489356994629}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.96363473383814, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.31602668762207}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.231689929962158}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05312442779541}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35457.25, 81920.0], "load": 0.98, "temperature": 70, "power": 260.735}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.09682105601212, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9287261962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.817322731018066}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.710197448730469}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.67908654671733, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.60150671005249}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.518242359161377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.411893844604492}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.77129725518438, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.325069427490234}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.233058929443359}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.172609329223633}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35457.25, 81920.0], "load": 1.0, "temperature": 70, "power": 294.369}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.097485542297363}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.5667196972169, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.041872501373291}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.932075023651123}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.845073699951172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.863225684657834, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.775830268859863}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.705239295959473}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.633525848388672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.83815419993368, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.575502395629883}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.5167365074157715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.423802852630615}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35457.25, 81920.0], "load": 1.0, "temperature": 70, "power": 269.892}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.912794990916815, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.354887962341309}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.272907257080078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.22577428817749}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.148532390594482}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.7547508091925, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.052487373352051}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.000744819641113}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.897304534912109}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.60362602443348, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.836282253265381}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.77700138092041}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.716606140136719}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35457.25, 81920.0], "load": 1.0, "temperature": 70, "power": 306.834}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.317553835254905, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.660195827484131}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.5612311363220215}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.502846717834473}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.04298415664623, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.482702732086182}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.492550373077393}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.360192775726318}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.2910284996032715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.72063049678099, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.2590203285217285}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.185894012451172}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35457.25, 81920.0], "load": 1.0, "temperature": 69, "power": 319.093}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.096889972686768}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.619040816702004, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.042828559875488}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.9600980281829834}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.8612828254699707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.440912234939894, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.813199996948242}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.774353265762329}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.680110216140747}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.99769951939742, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.641136646270752}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.5967588424682617}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.522066354751587}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35457.25, 81920.0], "load": 0.98, "temperature": 70, "power": 315.125}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.439159870147705}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.47298028781145, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.3564889430999756}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.3717949390411377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.323021650314331}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.77838891261446, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.2349772453308105}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.192401647567749}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.1241087913513184}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.60644088444255, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.0823092460632324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.988307476043701}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35457.25, 81920.0], "load": 0.98, "temperature": 70, "power": 263.6}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9341442584991455}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.95437398481953, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8254554271698}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.76162052154541}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.763589859008789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.7407455444335938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.710093386633496, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.672053337097168}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.585357189178467}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.5391626358032227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.60890554194444, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.4440035820007324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.3648884296417236}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35457.25, 81920.0], "load": 0.98, "temperature": 70, "power": 308.491}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.2605745792388916}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.57539616352493, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.2033910751342773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.1607017517089844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.115994691848755}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.03342497270329, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.066122055053711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.993964672088623}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.0019028186798096}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.9345535039901733}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.5030036835796, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.8910869359970093}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35457.25, 81920.0], "load": 0.98, "temperature": 70, "power": 308.162}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.861147165298462}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.8138902187347412}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.902777238886124, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.7461824417114258}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.6692562103271484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.625261664390564}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.7312606145442, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.5834534168243408}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.5272022485733032}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.459859013557434}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.083110798198064, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.4157166481018066}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.3812469244003296}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35457.25, 81920.0], "load": 0.98, "temperature": 70, "power": 274.512}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.3119561672210693}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.2564345598220825}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.76669678896762, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.2277307510375977}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.1866565942764282}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.1411606073379517}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.654995501586406, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.083279013633728}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.0341026782989502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9987545013427734}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.64848232343205, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9641054272651672}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35457.25, 81920.0], "load": 1.0, "temperature": 70, "power": 323.509}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9365309476852417}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.8940635919570923}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.05549333833567, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.8575931787490845}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.8317781686782837}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7994922995567322}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7648830413818359}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.5043554925025, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [144, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7411973476409912}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [145, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7056658267974854}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [146, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6834872364997864}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.90261530079427, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [147, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6573166847229004}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35457.25, 81920.0], "load": 0.98, "temperature": 70, "power": 288.122}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [148, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6341385245323181}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [149, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6199974417686462}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.71844956286858, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [150, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.5888203978538513}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [151, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.5731779932975769}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [152, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.5541250109672546}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.01937381380773, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [153, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.536582350730896}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [154, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.5135989785194397}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [155, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.4957291781902313}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [156, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.47861233353614807}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.520353360872384, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35457.25, 81920.0], "load": 0.98, "temperature": 71, "power": 275.329}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [157, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.47117674350738525}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [158, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.4504289925098419}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [159, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.43357139825820923}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.851139909820795, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [160, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.4255506992340088}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [161, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.41500476002693176}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [162, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.4001453220844269}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.95035354081807, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [163, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3856187164783478}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [164, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.37681058049201965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [165, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3642743229866028}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.11873703612618, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [166, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3536361753940582}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35457.25, 81920.0], "load": 0.98, "temperature": 71, "power": 280.102}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [167, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.34521564841270447}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [168, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3393672704696655}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [169, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3293442130088806}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.608792572624985, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [170, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.32399898767471313}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [171, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3113195300102234}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [172, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3049860894680023}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.501655815760145, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [173, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2919404208660126}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [174, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.28530269861221313}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [175, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2786383628845215}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.997505515357986, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35457.25, 81920.0], "load": 0.98, "temperature": 71, "power": 298.584}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [176, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.27610689401626587}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [177, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2684989273548126}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [178, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2630639374256134}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [179, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2509020268917084}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.11835694979252, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [180, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.24854587018489838}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [181, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.24656544625759125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [182, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.23619654774665833}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.99856486825351, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [183, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.23337531089782715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [184, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.22808784246444702}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [185, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.22517065703868866}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.92881027215602, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35457.25, 81920.0], "load": 0.98, "temperature": 72, "power": 297.072}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [186, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.21650603413581848}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [187, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.21727493405342102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [188, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.21555562317371368}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.51754152365656, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [189, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.20680122077465057}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [190, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.20380568504333496}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [191, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2012479454278946}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [192, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1972467303276062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.34690437653478, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [193, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1940830945968628}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [194, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1896078884601593}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [195, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.18643631041049957}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35457.25, 81920.0], "load": 0.98, "temperature": 71, "power": 239.574}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.85074011485873, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [196, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.18215446174144745}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [197, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.17853213846683502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [198, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.17645688354969025}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.831013512609616, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [199, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1725698709487915}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [200, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.16997073590755463}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [201, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.16654980182647705}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.33073530917256, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [202, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1624709963798523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [203, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.16435472667217255}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [204, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.16150528192520142}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [205, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.15788640081882477}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.53480402724219, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35457.25, 81920.0], "load": 1.0, "temperature": 72, "power": 314.75}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [206, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.15364141762256622}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [207, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.15300226211547852}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [208, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1467810720205307}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.934092596284884, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [209, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.14508351683616638}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [210, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.14274908602237701}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [211, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.14380408823490143}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.80449217531913, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [212, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.14112578332424164}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [213, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1418134719133377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [214, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.13803264498710632}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35457.25, 81920.0], "load": 1.0, "temperature": 72, "power": 307.517}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.21070815342267, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35457.25, 81920.0], "load": 0.98, "temperature": 72, "power": 278.943}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-t5.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "T5", "--batch-size", "16"], "time": 1712628434.0195384, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/t5.D1.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/t5.D1.data new file mode 100644 index 000000000..5b346abc1 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/t5.D1.data @@ -0,0 +1,585 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "T5", "--batch-size": 16}, "plan": {"method": "per_gpu"}, "tags": ["huggingface", "language-modeling", "nlp", "transformer"], "weight": 2.0, "name": "t5", "tag": ["t5", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 68, "power": 95.057, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 69, "power": 104.546, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712628358.774121, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-t5.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "T5", "--batch-size", "16"], "time": 1712628358.7997813}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.970378875732422}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.800580024719238}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.727293014526367}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [35457.25, 81920.0], "load": 1.0, "temperature": 70, "power": 267.743}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.682456970214844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.654926300048828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.628446578979492}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.61573600769043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.587346076965332}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.556131362915039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.513788223266602}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.457639694213867}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.321690559387207}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.367353439331055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.313187599182129}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [35457.25, 81920.0], "load": 1.0, "temperature": 72, "power": 311.107}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.28883171081543}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.212865829467773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.028278350830078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.84654426574707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.015433311462402}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.654990196228027}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.059836908144426, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.576047897338867}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.431913375854492}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35800552368164}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [35457.25, 81920.0], "load": 0.98, "temperature": 73, "power": 320.724}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.99642881284825, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.306499481201172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.0736665725708}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.939047813415527}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.910099029541016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.511842096507074, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.83672046661377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.653719902038574}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.481781959533691}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.78368861175754, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.48547649383545}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.421192169189453}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.146132469177246}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.28407000706103, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [35457.25, 81920.0], "load": 1.0, "temperature": 72, "power": 300.496}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.017644882202148}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.908407211303711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.934696197509766}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.83332849096231, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.747555732727051}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.655747413635254}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.503643035888672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.347489356994629}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.16114650992588, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.31602668762207}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.231689929962158}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [35457.25, 81920.0], "load": 0.98, "temperature": 72, "power": 287.423}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05312442779541}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.34805899330237, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9287261962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.817322731018066}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.710197448730469}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.640296136368406, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.60150671005249}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.518242359161377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.411893844604492}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.746932031963155, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.325069427490234}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.233058929443359}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.172609329223633}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [35457.25, 81920.0], "load": 0.98, "temperature": 72, "power": 238.677}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.097485542297363}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.47451875861068, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.041872501373291}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.932075023651123}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.845073699951172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.51220222647899, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.775830268859863}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.705239295959473}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.633525848388672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.37691699316138, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.575502395629883}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.5167365074157715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.423802852630615}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [35457.25, 81920.0], "load": 0.98, "temperature": 72, "power": 267.521}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.700017269958266, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.354887962341309}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.272907257080078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.22577428817749}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.148532390594482}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.60039891556745, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.052487373352051}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.000744819641113}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.897304534912109}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.36864617439128, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.836282253265381}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.77700138092041}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [35457.25, 81920.0], "load": 0.98, "temperature": 72, "power": 284.422}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.716606140136719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.35805231864255, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.660195827484131}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.5612311363220215}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.502846717834473}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.99471154706265, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.482702732086182}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.492550373077393}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.360192775726318}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.2910284996032715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.896749444216404, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.2590203285217285}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.185894012451172}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [35457.25, 81920.0], "load": 1.0, "temperature": 71, "power": 262.429}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.096889972686768}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.302709421382886, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.042828559875488}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.9600980281829834}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.8612828254699707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.98040413122715, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.813199996948242}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.774353265762329}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.680110216140747}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.698497510574605, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.641136646270752}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.5967588424682617}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.522066354751587}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [35457.25, 81920.0], "load": 1.0, "temperature": 72, "power": 231.314}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.439159870147705}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.08868710434967, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.3564889430999756}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.3717949390411377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.323021650314331}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.41143802554398, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.2349772453308105}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.192401647567749}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.1241087913513184}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.5226293429665, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.0823092460632324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.988307476043701}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [35457.25, 81920.0], "load": 1.0, "temperature": 72, "power": 314.817}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9341442584991455}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.67608666592505, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8254554271698}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.76162052154541}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.763589859008789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.7407455444335938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.911959809459766, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.672053337097168}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.585357189178467}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.5391626358032227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.512270677597144, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.4440035820007324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.3648884296417236}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [35457.25, 81920.0], "load": 1.0, "temperature": 72, "power": 232.141}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.2605745792388916}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.96174317167389, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.2033910751342773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.1607017517089844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.115994691848755}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.77710009757197, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.066122055053711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.993964672088623}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.0019028186798096}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.9345535039901733}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.69221155237575, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.8910869359970093}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [35457.25, 81920.0], "load": 1.0, "temperature": 72, "power": 305.641}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.861147165298462}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.8138902187347412}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.36303075458598, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.7461824417114258}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.6692562103271484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.625261664390564}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.385061918953426, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.5834534168243408}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.5272022485733032}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.459859013557434}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.70465585101718, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.4157166481018066}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.3812469244003296}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [35457.25, 81920.0], "load": 0.98, "temperature": 72, "power": 278.282}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.3119561672210693}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.2564345598220825}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.66524428438812, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.2277307510375977}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.1866565942764282}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.1411606073379517}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.54266512152416, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.083279013633728}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.0341026782989502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9987545013427734}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.313358656652774, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9641054272651672}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [35457.25, 81920.0], "load": 0.98, "temperature": 72, "power": 306.992}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9365309476852417}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.8940635919570923}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.87638509646861, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.8575931787490845}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.8317781686782837}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7994922995567322}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7648830413818359}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.723709904897156, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [144, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7411973476409912}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [145, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7056658267974854}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [146, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6834872364997864}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.43399139802172, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [147, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6573166847229004}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [35457.25, 81920.0], "load": 0.98, "temperature": 73, "power": 328.604}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [148, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6341385245323181}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [149, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6199974417686462}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.52137983963779, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [150, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.5888203978538513}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [151, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.5731779932975769}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [152, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.5541250109672546}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.80508152707047, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [153, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.536582350730896}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [154, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.5135989785194397}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [155, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.4957291781902313}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [156, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.47861233353614807}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.693472928690696, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [35457.25, 81920.0], "load": 0.98, "temperature": 73, "power": 315.582}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [157, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.47117674350738525}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [158, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.4504289925098419}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [159, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.43357139825820923}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.33159515835027, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [160, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.4255506992340088}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [161, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.41500476002693176}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [162, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.4001453220844269}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.442339157877704, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [163, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3856187164783478}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [164, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.37681058049201965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [165, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3642743229866028}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.793679165970666, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [166, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3536361753940582}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [35457.25, 81920.0], "load": 0.98, "temperature": 73, "power": 317.128}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [167, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.34521564841270447}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [168, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3393672704696655}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [169, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3293442130088806}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.71145437579167, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [170, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.32399898767471313}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [171, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3113195300102234}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [172, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3049860894680023}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.49720210373687, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [173, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2919404208660126}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [174, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.28530269861221313}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [175, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2786383628845215}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [35457.25, 81920.0], "load": 0.98, "temperature": 73, "power": 320.079}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.64089210249715, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [176, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.27610689401626587}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [177, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2684989273548126}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [178, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2630639374256134}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.731937839263644, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [179, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2509020268917084}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [180, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.24854587018489838}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [181, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.24656544625759125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [182, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.23619654774665833}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.441355633238416, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [183, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.23337531089782715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [184, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.22808784246444702}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [185, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.22517065703868866}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.5428081778069, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [35457.25, 81920.0], "load": 0.98, "temperature": 74, "power": 309.039}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [186, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.21650603413581848}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [187, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.21727493405342102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [188, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.21555562317371368}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.542521265970386, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [189, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.20680122077465057}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [190, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.20380568504333496}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [191, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2012479454278946}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.74623703358478, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [192, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1972467303276062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [193, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1940830945968628}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [194, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1896078884601593}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [35457.25, 81920.0], "load": 1.0, "temperature": 74, "power": 301.933}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [195, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.18643631041049957}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.8897010391878, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [196, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.18215446174144745}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [197, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.17853213846683502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [198, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.17645688354969025}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.471308500906076, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [199, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1725698709487915}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [200, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.16997073590755463}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [201, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.16654980182647705}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.3160232221544, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [202, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1624709963798523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [203, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.16435472667217255}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [204, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.16150528192520142}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [35457.25, 81920.0], "load": 1.0, "temperature": 73, "power": 274.698}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.91113816020368, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [205, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.15788640081882477}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [206, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.15364141762256622}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [207, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.15300226211547852}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [208, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1467810720205307}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.07283326990232, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [209, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.14508351683616638}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [210, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.14274908602237701}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [211, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.14380408823490143}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.93712890110383, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [212, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.14112578332424164}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [213, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1418134719133377}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [35457.25, 81920.0], "load": 1.0, "temperature": 73, "power": 295.34}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [214, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.13803264498710632}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.91615472182591, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [35457.25, 81920.0], "load": 0.85, "temperature": 72, "power": 279.218}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-t5.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "T5", "--batch-size", "16"], "time": 1712628434.1183317, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/tf32.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/tf32.D0.data new file mode 100644 index 000000000..1ca937c96 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/tf32.D0.data @@ -0,0 +1,116 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp32", "--tf32": true}, "weight": 0.0, "name": "tf32", "tag": ["tf32", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 65, "power": 92.373, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 68, "power": 102.956, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712627493.196637, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32", "--tf32"], "time": 1712627496.3201938}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 118.60417196274433, "units": "Tflops", "t": 1712627498.2324815}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [882.5, 81920.0], "load": 0, "temperature": 61, "power": 87.254}}, "t": 1712627497.6910694}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 0.27, "temperature": 64, "power": 89.232}}, "t": 1712627498.2092075}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.5972684241792, "units": "Tflops", "t": 1712627498.3966658}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.43767233287002, "units": "Tflops", "t": 1712627498.5615244}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.48265759636382, "units": "Tflops", "t": 1712627498.7250948}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.49363921425484, "units": "Tflops", "t": 1712627498.8886724}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 128.72042622501058, "units": "Tflops", "t": 1712627499.0595603}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.08682517288565, "units": "Tflops", "t": 1712627499.2260928}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.77811158780622, "units": "Tflops", "t": 1712627499.3905199}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 66, "power": 301.618}}, "t": 1712627499.3774428}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.39291091572005, "units": "Tflops", "t": 1712627499.554263}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.4424723248117, "units": "Tflops", "t": 1712627499.7182257}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.12224272856102, "units": "Tflops", "t": 1712627499.8822439}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 130.02037044872733, "units": "Tflops", "t": 1712627500.051443}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 66, "power": 306.955}}, "t": 1712627499.9104989}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.1197834166361, "units": "Tflops", "t": 1712627500.2194135}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.12068246911082, "units": "Tflops", "t": 1712627500.3834262}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.49069753360712, "units": "Tflops", "t": 1712627500.5469947}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.3894275730955, "units": "Tflops", "t": 1712627500.7119055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.40505505437343, "units": "Tflops", "t": 1712627500.8767986}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.68739585371733, "units": "Tflops", "t": 1712627501.0439627}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.74834177559225, "units": "Tflops", "t": 1712627501.210941}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 66, "power": 290.923}}, "t": 1712627501.2072651}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.05596369973685, "units": "Tflops", "t": 1712627501.375108}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.30445731938906, "units": "Tflops", "t": 1712627501.538896}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.10722090685866, "units": "Tflops", "t": 1712627501.704223}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.22065287265434, "units": "Tflops", "t": 1712627501.869367}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 66, "power": 307.926}}, "t": 1712627501.7380395}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.3932167807307, "units": "Tflops", "t": 1712627502.0360162}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.74834177559225, "units": "Tflops", "t": 1712627502.20299}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.2637692180013, "units": "Tflops", "t": 1712627502.3680723}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.65617472785516, "units": "Tflops", "t": 1712627502.5327904}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.275130399387, "units": "Tflops", "t": 1712627502.6978452}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.831298354343, "units": "Tflops", "t": 1712627502.8622189}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.08890596552587, "units": "Tflops", "t": 1712627503.0287902}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.02707765499673, "units": "Tflops", "t": 1712627503.195404}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 66, "power": 289.747}}, "t": 1712627503.0315259}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.69414710335874, "units": "Tflops", "t": 1712627503.3600326}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.07457295872698, "units": "Tflops", "t": 1712627503.5254602}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.42705549165194, "units": "Tflops", "t": 1712627503.6903226}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 66, "power": 308.034}}, "t": 1712627503.5565455}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.25664539753285, "units": "Tflops", "t": 1712627503.8555002}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.10328414244142, "units": "Tflops", "t": 1712627504.022018}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.82180644849961, "units": "Tflops", "t": 1712627504.1877906}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 66, "power": 297.353}}, "t": 1712627504.0829027}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.3151988352161, "units": "Tflops", "t": 1712627504.3528903}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.36416124591742, "units": "Tflops", "t": 1712627504.5178337}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.2554902551701, "units": "Tflops", "t": 1712627504.682914}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.07994916609832, "units": "Tflops", "t": 1712627504.8483405}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.7657876934948, "units": "Tflops", "t": 1712627505.014038}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 67, "power": 297.823}}, "t": 1712627504.878817}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.32369918776604, "units": "Tflops", "t": 1712627505.1803703}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.92057565639445, "units": "Tflops", "t": 1712627505.3458614}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.92943594291023, "units": "Tflops", "t": 1712627505.510112}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 67, "power": 291.23}}, "t": 1712627505.4076989}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.117402660722, "units": "Tflops", "t": 1712627505.675584}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.39302674243865, "units": "Tflops", "t": 1712627505.8417482}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.06113434051738, "units": "Tflops", "t": 1712627506.0070646}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 67, "power": 301.711}}, "t": 1712627505.9380245}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.70206286623957, "units": "Tflops", "t": 1712627506.1741319}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.96675657894033, "units": "Tflops", "t": 1712627506.3395777}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.14075778163163, "units": "Tflops", "t": 1712627506.5062134}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.9805595952464, "units": "Tflops", "t": 1712627506.6716409}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.0521127911943, "units": "Tflops", "t": 1712627506.8369668}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 67, "power": 292.202}}, "t": 1712627506.732608}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.67946229590765, "units": "Tflops", "t": 1712627507.0028403}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.3440150842097, "units": "Tflops", "t": 1712627507.1690574}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.53780382171095, "units": "Tflops", "t": 1712627507.3351917}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 67, "power": 298.024}}, "t": 1712627507.254518}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.35799077324816, "units": "Tflops", "t": 1712627507.500203}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.12643306642883, "units": "Tflops", "t": 1712627507.6654484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.17141119984834, "units": "Tflops", "t": 1712627507.8306277}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 67, "power": 292.576}}, "t": 1712627507.7815886}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.16468176837492, "units": "Tflops", "t": 1712627507.99587}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.3362297512038, "units": "Tflops", "t": 1712627508.1620939}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.34610398794078, "units": "Tflops", "t": 1712627508.3283167}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.79965295680194, "units": "Tflops", "t": 1712627508.49285}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.74343162909292, "units": "Tflops", "t": 1712627508.658562}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 67, "power": 307.542}}, "t": 1712627508.6008003}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.6773628557895, "units": "Tflops", "t": 1712627508.8244603}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.8880191919094, "units": "Tflops", "t": 1712627508.9900017}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.11028517732797, "units": "Tflops", "t": 1712627509.1565187}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 67, "power": 295.095}}, "t": 1712627509.1305542}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.98554472209804, "units": "Tflops", "t": 1712627509.321974}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.89165706394874, "units": "Tflops", "t": 1712627509.487515}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.3502784853184, "units": "Tflops", "t": 1712627509.6526055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.38749850832392, "units": "Tflops", "t": 1712627509.8175297}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.15287502012768, "units": "Tflops", "t": 1712627509.9839795}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.31629587924851, "units": "Tflops", "t": 1712627510.150226}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.93666826922336, "units": "Tflops", "t": 1712627510.315705}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.49515588137794, "units": "Tflops", "t": 1712627510.4817374}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 68, "power": 302.399}}, "t": 1712627510.443283}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.43921674143633, "units": "Tflops", "t": 1712627510.646659}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.8788296654432, "units": "Tflops", "t": 1712627510.8122041}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.56466326016832, "units": "Tflops", "t": 1712627510.9781406}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 67, "power": 298.416}}, "t": 1712627510.9693}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.33547025556052, "units": "Tflops", "t": 1712627511.144569}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.05575965318286, "units": "Tflops", "t": 1712627511.3098912}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.29769634944196, "units": "Tflops", "t": 1712627511.4761598}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.67980805923207, "units": "Tflops", "t": 1712627511.6407173}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 68, "power": 292.864}}, "t": 1712627511.4894822}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.86523915433492, "units": "Tflops", "t": 1712627511.8063247}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.6430178176376, "units": "Tflops", "t": 1712627511.9721694}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.35009207818106, "units": "Tflops", "t": 1712627512.138374}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.5421844056144, "units": "Tflops", "t": 1712627512.3043497}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 68, "power": 302.965}}, "t": 1712627512.2854214}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.40423993521134, "units": "Tflops", "t": 1712627512.470713}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.832901090859, "units": "Tflops", "t": 1712627512.6363332}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.9756926813863, "units": "Tflops", "t": 1712627512.8030083}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 68, "power": 292.682}}, "t": 1712627512.802219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.31731850980253, "units": "Tflops", "t": 1712627512.968053}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32", "--tf32"], "time": 1712627513.963926, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/tf32.D1.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/tf32.D1.data new file mode 100644 index 000000000..bac00cbb4 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/tf32.D1.data @@ -0,0 +1,115 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "flops", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "plan": {"method": "per_gpu"}, "tags": ["diagnostic", "flops"], "argv": {"--number": 10, "--repeat": 90, "--m": 8192, "--n": 8192, "--dtype": "fp32", "--tf32": true}, "weight": 0.0, "name": "tf32", "tag": ["tf32", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 63, "power": 90.87, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 66, "power": 98.628, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712627496.30956, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32", "--tf32"], "time": 1712627496.3207562}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 118.80323146252016, "units": "Tflops", "t": 1712627498.2564285}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [882.5, 81920.0], "load": 0, "temperature": 63, "power": 95.75}}, "t": 1712627497.7258017}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 0.07, "temperature": 66, "power": 367.554}}, "t": 1712627498.245644}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.61574526459933, "units": "Tflops", "t": 1712627498.422959}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.36126876575713, "units": "Tflops", "t": 1712627498.5879226}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.51918870937297, "units": "Tflops", "t": 1712627498.752674}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.09060848099458, "units": "Tflops", "t": 1712627498.919214}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 129.04838576194348, "units": "Tflops", "t": 1712627499.089668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.80841883080282, "units": "Tflops", "t": 1712627499.2553113}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.67555180516254, "units": "Tflops", "t": 1712627499.4223638}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 68, "power": 300.003}}, "t": 1712627499.377105}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.43998278126782, "units": "Tflops", "t": 1712627499.5885348}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.9826801985996, "units": "Tflops", "t": 1712627499.7555513}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.63843982263975, "units": "Tflops", "t": 1712627499.9213963}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 69, "power": 305.743}}, "t": 1712627499.909998}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.65619231412896, "units": "Tflops", "t": 1712627500.0886629}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.09363528352907, "units": "Tflops", "t": 1712627500.255196}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.8076488632499, "units": "Tflops", "t": 1712627500.4220855}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.26506491603487, "units": "Tflops", "t": 1712627500.5883944}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.70507188803128, "units": "Tflops", "t": 1712627500.7554138}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.953914679042, "units": "Tflops", "t": 1712627500.9208684}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 130.96953071056626, "units": "Tflops", "t": 1712627501.0888295}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.51145865683176, "units": "Tflops", "t": 1712627501.2561}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 69, "power": 299.518}}, "t": 1712627501.207603}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.28005061024174, "units": "Tflops", "t": 1712627501.422462}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.335090511008, "units": "Tflops", "t": 1712627501.5886822}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.02197515193834, "units": "Tflops", "t": 1712627501.7553146}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 69, "power": 294.661}}, "t": 1712627501.7385375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.49839159294447, "units": "Tflops", "t": 1712627501.9213853}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.43406028159322, "units": "Tflops", "t": 1712627502.088758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.35581758833405, "units": "Tflops", "t": 1712627502.256239}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.4133639340641, "units": "Tflops", "t": 1712627502.4223752}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.48408707357817, "units": "Tflops", "t": 1712627502.5896711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.4215385842504, "units": "Tflops", "t": 1712627502.755782}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.01025978558002, "units": "Tflops", "t": 1712627502.9224327}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.92151732738867, "units": "Tflops", "t": 1712627503.089173}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 69, "power": 286.665}}, "t": 1712627503.0237138}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.05643791595244, "units": "Tflops", "t": 1712627503.257069}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.22372959459682, "units": "Tflops", "t": 1712627503.4234288}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.68720783630462, "units": "Tflops", "t": 1712627503.5904791}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 69, "power": 305.933}}, "t": 1712627503.5506425}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.76302236802124, "units": "Tflops", "t": 1712627503.757686}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.94931519889377, "units": "Tflops", "t": 1712627503.9231408}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.12817196894414, "units": "Tflops", "t": 1712627504.0908895}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 69, "power": 293.985}}, "t": 1712627504.0778005}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.4683442021816, "units": "Tflops", "t": 1712627504.2582536}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.94509928830865, "units": "Tflops", "t": 1712627504.4237204}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 130.7960616254584, "units": "Tflops", "t": 1712627504.5918953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.9753149988306, "units": "Tflops", "t": 1712627504.7585707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.61421985861728, "units": "Tflops", "t": 1712627504.9244556}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 70, "power": 307.49}}, "t": 1712627504.8808484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.36236543633927, "units": "Tflops", "t": 1712627505.0919597}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.56773741797534, "units": "Tflops", "t": 1712627505.2591484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.92698947193824, "units": "Tflops", "t": 1712627505.4258895}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 70, "power": 299.905}}, "t": 1712627505.4087129}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.6567561023118, "units": "Tflops", "t": 1712627505.5930119}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.33907793751024, "units": "Tflops", "t": 1712627505.7592435}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.76132828654005, "units": "Tflops", "t": 1712627505.9261863}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.8489128850886, "units": "Tflops", "t": 1712627506.0930254}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 70, "power": 307.489}}, "t": 1712627505.9361227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.49739648871738, "units": "Tflops", "t": 1712627506.2603757}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.04200946653881, "units": "Tflops", "t": 1712627506.4271443}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.36292671082913, "units": "Tflops", "t": 1712627506.5945966}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.64778689848208, "units": "Tflops", "t": 1712627506.7604246}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 70, "power": 306.437}}, "t": 1712627506.740078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.57486949826927, "units": "Tflops", "t": 1712627506.9276936}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.17535426234832, "units": "Tflops", "t": 1712627507.0953972}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.40316387555706, "units": "Tflops", "t": 1712627507.2627978}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.18128350766824, "units": "Tflops", "t": 1712627507.429219}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 70, "power": 291.142}}, "t": 1712627507.2629893}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.77186994577863, "units": "Tflops", "t": 1712627507.5961971}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.23301831022647, "units": "Tflops", "t": 1712627507.7625468}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.79069853332535, "units": "Tflops", "t": 1712627507.9294643}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 70, "power": 300.888}}, "t": 1712627507.7930548}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.56098142351678, "units": "Tflops", "t": 1712627508.0967124}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.20558368488565, "units": "Tflops", "t": 1712627508.2643614}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.30623630054922, "units": "Tflops", "t": 1712627508.4306173}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.42076368377613, "units": "Tflops", "t": 1712627508.5979934}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.22752074947567, "units": "Tflops", "t": 1712627508.764358}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 70, "power": 305.152}}, "t": 1712627508.6002922}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.07208228736323, "units": "Tflops", "t": 1712627508.9323661}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.90963112922813, "units": "Tflops", "t": 1712627509.099132}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 130.82500307588447, "units": "Tflops", "t": 1712627509.2672856}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 70, "power": 301.085}}, "t": 1712627509.1300452}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.8242930175341, "units": "Tflops", "t": 1712627509.4329445}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.40316387555706, "units": "Tflops", "t": 1712627509.6003573}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.90038778915633, "units": "Tflops", "t": 1712627509.767124}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.42787983810894, "units": "Tflops", "t": 1712627509.93449}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.3182268676075, "units": "Tflops", "t": 1712627510.1019962}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.21416988803608, "units": "Tflops", "t": 1712627510.2696428}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.75718831609382, "units": "Tflops", "t": 1712627510.4353454}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.39417855745432, "units": "Tflops", "t": 1712627510.6027565}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 70, "power": 296.309}}, "t": 1712627510.4429615}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.81876311245483, "units": "Tflops", "t": 1712627510.7696822}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.78034214435925, "units": "Tflops", "t": 1712627510.9366014}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.36928815694137, "units": "Tflops", "t": 1712627511.1040409}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 70, "power": 296.403}}, "t": 1712627510.968443}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.440803246552, "units": "Tflops", "t": 1712627511.2714384}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.35616963026683, "units": "Tflops", "t": 1712627511.437631}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.6383415187791, "units": "Tflops", "t": 1712627511.6047297}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.7867440836208, "units": "Tflops", "t": 1712627511.771648}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.86041111761273, "units": "Tflops", "t": 1712627511.9384644}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.36685576655978, "units": "Tflops", "t": 1712627512.1059096}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.504145954087, "units": "Tflops", "t": 1712627512.2731924}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.2997837912375, "units": "Tflops", "t": 1712627512.439634}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 71, "power": 305.758}}, "t": 1712627512.2811463}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.4887731800349, "units": "Tflops", "t": 1712627512.6070387}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.60547114306186, "units": "Tflops", "t": 1712627512.7741804}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.52552383284933, "units": "Tflops", "t": 1712627512.9414246}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [2179.25, 81920.0], "load": 1.0, "temperature": 71, "power": 302.556}}, "t": 1712627512.7987025}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 131.40971644478287, "units": "Tflops", "t": 1712627513.1088738}, "pipe": "data"} +{"event": "end", "data": {"command": ["/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--number", "10", "--repeat", "90", "--m", "8192", "--n", "8192", "--dtype", "fp32", "--tf32"], "time": 1712627514.0354278, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/whisper.D0.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/whisper.D0.data new file mode 100644 index 000000000..fe9bbe8b1 --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/whisper.D0.data @@ -0,0 +1,682 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Whisper", "--batch-size": 64}, "plan": {"method": "per_gpu"}, "tags": ["audio", "huggingface"], "weight": 1.0, "name": "whisper", "tag": ["whisper", "D0"], "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 1.0, "memory": 0.010771942138671876}, "temperature": 65, "power": 95.351, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 67, "power": 102.651, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712628519.726022, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-whisper.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Whisper", "--batch-size", "64"], "time": 1712628522.778876}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1357.25, 81920.0], "load": 0, "temperature": 61, "power": 87.49}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1357.25, 81920.0], "load": 0, "temperature": 60, "power": 86.403}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1357.25, 81920.0], "load": 0, "temperature": 59, "power": 86.111}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1357.25, 81920.0], "load": 0, "temperature": 59, "power": 85.318}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6992721557617188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.4653310775756836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.970947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6924057006835938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7043609619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6922607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6987991333007812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6994171142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912460327148438}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36737.25, 81920.0], "load": 0.92, "temperature": 63, "power": 323.613}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6975479125976562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6967544555664062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912155151367188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6952743530273438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6956329345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6933135986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6947784423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6918106079101562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6918716430664062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6938247680664062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6923065185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69122314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6926803588867188}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36737.25, 81920.0], "load": 0.91, "temperature": 64, "power": 281.558}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 233.20355353145666, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6926116943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6917190551757812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69244384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.15254374030908, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6915664672851562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912841796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6919631958007812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6917724609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.1166122239993, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691802978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6913375854492188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.39741800060835, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912460327148438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691558837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36737.25, 81920.0], "load": 0.88, "temperature": 65, "power": 296.763}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914215087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.430143517425, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691375732421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69146728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912155151367188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912612915039062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.97680791987392, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6913833618164062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912765502929688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912918090820312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.0481841833667, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36737.25, 81920.0], "load": 0.91, "temperature": 65, "power": 300.356}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912460327148438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.78146983004044, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912155151367188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912307739257812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.4172242705053, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912155151367188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36737.25, 81920.0], "load": 0.9, "temperature": 66, "power": 313.445}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.80002490787487, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912384033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.700042055461, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912307739257812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.50197133458778, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.77138619084587, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36737.25, 81920.0], "load": 0.91, "temperature": 66, "power": 301.043}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.8614400998497, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911468505859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.98738396766464, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691131591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 216.13698101963806, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36737.25, 81920.0], "load": 0.92, "temperature": 66, "power": 241.979}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691131591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.7748856827395, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911468505859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.84798729431978, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36737.25, 81920.0], "load": 0.91, "temperature": 66, "power": 293.904}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.67112619479394, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.42842448955557, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.5471380578383, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.88312865470508, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36737.25, 81920.0], "load": 1.0, "temperature": 67, "power": 305.609}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.78517894102703, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69110107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.3502542844026, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.89708732525554, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691131591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911468505859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36737.25, 81920.0], "load": 1.0, "temperature": 67, "power": 301.346}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.4751310137256, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.99440949970534, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691131591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911087036132812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.36284681091965, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911087036132812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.77090543142646, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36737.25, 81920.0], "load": 0.92, "temperature": 69, "power": 305.243}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69110107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.90779385721717, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910781860351562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.3287078336257, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910781860351562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [144, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911087036132812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [145, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910629272460938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [146, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910858154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.3124112965127, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [147, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910629272460938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [148, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910552978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [149, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910476684570312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [150, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910552978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36737.25, 81920.0], "load": 0.91, "temperature": 69, "power": 312.901}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.7602409299628, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [151, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910476684570312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [152, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910247802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [153, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691009521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [154, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6909866333007812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.2676407743443, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [155, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6909027099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [156, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6908340454101562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [157, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [158, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6919326782226562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.62714061456694, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [159, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914596557617188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [160, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910934448242188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [161, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914291381835938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [162, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691009521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.8297819380589, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [163, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69134521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [164, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691009521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36737.25, 81920.0], "load": 1.0, "temperature": 69, "power": 158.467}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [165, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [166, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69110107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.1755367151923, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [167, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910018920898438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [168, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912841796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [169, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910476684570312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [170, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691253662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.20478097763092, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [171, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910247802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [172, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [173, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910247802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [174, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.39329969579188, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [175, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910324096679688}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36737.25, 81920.0], "load": 0.91, "temperature": 71, "power": 303.672}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [176, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910324096679688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [177, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910858154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [178, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6909027099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.49026333921654, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [179, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6909942626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [180, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6908721923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [181, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6908798217773438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [182, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6908721923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.18030682428667, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [183, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6906585693359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [184, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6905517578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [185, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6905288696289062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [186, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6909103393554688}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36737.25, 81920.0], "load": 0.91, "temperature": 71, "power": 291.419}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.35809460700295, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [187, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6955337524414062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [188, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7002067565917969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [189, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.697906494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [190, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6913986206054688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.60470811980053, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [191, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6951828002929688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [192, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6938323974609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [193, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914520263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [194, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6948165893554688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.1595745353303, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [195, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912918090820312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [196, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6923904418945312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [197, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.693206787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [198, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6908798217773438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.372055382704, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [199, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6921310424804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [200, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6922607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36737.25, 81920.0], "load": 0.91, "temperature": 71, "power": 277.364}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [201, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6906814575195312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [202, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6915359497070312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.8603438134575, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [203, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6916046142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [204, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6903610229492188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [205, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6906814575195312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [206, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69073486328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.14771190173656, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [207, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6897354125976562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [208, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6897735595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [209, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6895599365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [210, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6880340576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.69854914011327, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [211, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6874923706054688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [212, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6850967407226562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [213, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6821975708007812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [214, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.67584228515625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36737.25, 81920.0], "load": 0.91, "temperature": 72, "power": 292.662}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.10250017239036, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [215, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6980819702148438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [216, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.832855224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [217, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7104606628417969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [218, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.708587646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.54284134103227, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [219, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6919479370117188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [220, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6960067749023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [221, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6996231079101562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [222, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6961898803710938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.51832109810212, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [223, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914520263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [224, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [225, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6962814331054688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [226, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6955718994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.40611855930214, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [227, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6918563842773438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [228, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691741943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36737.25, 81920.0], "load": 0.91, "temperature": 71, "power": 288.472}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [229, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.693878173828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [230, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6942672729492188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.82284920772418, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [231, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6923370361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [232, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [233, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [234, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6931381225585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.6352332104412, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [235, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6926040649414062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [236, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6917266845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [237, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [238, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6917572021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 216.47513016207745, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [239, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69232177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [240, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6920166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [241, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [242, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691253662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36737.25, 81920.0], "load": 0.92, "temperature": 71, "power": 285.851}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.51076217084878, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [243, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [244, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69171142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [245, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6917266845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [246, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6913909912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.0999407217333, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [247, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [248, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [249, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914443969726562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [250, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.33361707169203, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [251, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6913299560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [252, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910629272460938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [253, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912384033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36737.25, 81920.0], "load": 0.91, "temperature": 73, "power": 329.303}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [254, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.23512796562392, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [255, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914215087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [256, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [257, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912460327148438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [258, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.62843457496604, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [259, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [260, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912384033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [261, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [262, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912612915039062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.1311407122093, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36737.25, 81920.0], "load": 0.92, "temperature": 72, "power": 315.966}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-whisper.D0-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Whisper", "--batch-size", "64"], "time": 1712628611.0267122, "return_code": 0}, "pipe": null} diff --git a/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/whisper.D1.data b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/whisper.D1.data new file mode 100644 index 000000000..1b54b817a --- /dev/null +++ b/paice-v1-11-g010135f/NVIDIA_A100_80GB_PCIe/zobisevu.2024-04-09_01:45:18.095020/whisper.D1.data @@ -0,0 +1,682 @@ +{"event": "config", "data": {"system": {"arch": "cuda", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}, "azure__a100_x2": {"location": "eastus2", "size": "Standard_NC48ads_A100_v4", "username": "ubuntu"}, "azure__a10_x2": {"location": "eastus2", "size": "Standard_NV72ads_A10_v5", "username": "ubuntu"}}, "self": {"aliaslist": [], "hostname": "surebadger.eastus2.cloudapp.azure.com", "ip": "surebadger.eastus2.cloudapp.azure.com", "ipaddrlist": ["::1", "60:45:bd:b9:f0:5b", "00:00:00:00:00:00", "10.0.1.4", "127.0.0.1", "fe80::6245:bdff:feb9:f05b%eth0"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-a100_x2-4db6f1500e0007c61a4cf0533daca445/id_rsa.covalent.surebadger.pem", "local": true, "main": true, "name": "manager", "user": "ubuntu"}}, "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "group": "hf", "install_group": "torch", "install_variant": "cuda", "run_name": "zobisevu.2024-04-09_01:45:18.095020", "enabled": true, "capabilities": {"nodes": 1}, "max_duration": 600, "voir": {"options": {"stop": 60, "interval": "1s"}}, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "config_base": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "hash": "9e157e2a825e38410d009e3113db32b8", "definition": "/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "argv": {"--precision": "tf32-fp16", "--num-workers": 8, "--model": "Whisper", "--batch-size": 64}, "plan": {"method": "per_gpu"}, "tags": ["audio", "huggingface"], "weight": 1.0, "name": "whisper", "tag": ["whisper", "D1"], "device": "1", "devices": ["1"], "env": {"CUDA_VISIBLE_DEVICES": "1"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 48, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "surebadger", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-69ef522d-2247-997c-74c7-19b5ed55c76e": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 63, "power": 89.635, "selection_variable": "CUDA_VISIBLE_DEVICES"}, "GPU-f7227278-e279-a0b8-6498-8dafbd9e5723": {"device": "1", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 65, "power": 97.731, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1712628522.760872, "milabench": {"tag": "paice-v1-11-g010135f", "commit": "010135f53e9664ae61b596149e569230d8b45f44", "date": "2024-04-03 00:41:57 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-whisper.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Whisper", "--batch-size", "64"], "time": 1712628522.7866943}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1357.25, 81920.0], "load": 0, "temperature": 62, "power": 94.071}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1357.25, 81920.0], "load": 0, "temperature": 62, "power": 93.278}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1357.25, 81920.0], "load": 0, "temperature": 61, "power": 97.1}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [1357.25, 81920.0], "load": 0, "temperature": 60, "power": 92.07}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6992721557617188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.4653310775756836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.970947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6924057006835938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7043609619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6922607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6987991333007812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6994171142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912460327148438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6975479125976562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6967544555664062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912155151367188}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [36737.25, 81920.0], "load": 0.99, "temperature": 66, "power": 162.293}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6952743530273438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6956329345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6933135986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6947784423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6918106079101562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6918716430664062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6938247680664062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6923065185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69122314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6926803588867188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 227.87081533495106, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6926116943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6917190551757812}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [36737.25, 81920.0], "load": 0.95, "temperature": 67, "power": 306.797}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69244384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.4525389276084, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6915664672851562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912841796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6919631958007812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6917724609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.1855828678801, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691802978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6913375854492188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.41269013087657, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912460327148438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691558837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914215087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.97980020405515, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691375732421875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [36737.25, 81920.0], "load": 0.96, "temperature": 67, "power": 275.808}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69146728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912155151367188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912612915039062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.22874611609456, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6913833618164062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912765502929688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912918090820312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.91336190980763, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912460327148438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.93432809642067, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912155151367188}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [36737.25, 81920.0], "load": 0.92, "temperature": 67, "power": 100.861}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912307739257812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 183.39824759678982, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912155151367188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.25414620435865, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912384033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.0070135113619, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912307739257812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.1336980023193, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [36737.25, 81920.0], "load": 0.91, "temperature": 68, "power": 295.428}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.84456912598472, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.78637659162342, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911468505859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.6134581820821, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691131591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [36737.25, 81920.0], "load": 0.96, "temperature": 68, "power": 288.323}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.32336757667804, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691131591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.0210646989173, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911468505859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.83480841060742, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [36737.25, 81920.0], "load": 0.9, "temperature": 69, "power": 293.688}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.16361353005877, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.49032330688223, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.12730317291926, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [36737.25, 81920.0], "load": 0.91, "temperature": 69, "power": 291.033}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.03369365803627, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.81147048372614, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69110107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.40515043858628, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [36737.25, 81920.0], "load": 0.9, "temperature": 70, "power": 324.427}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.1186030975462, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691131591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911468505859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.66775874551422, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 211.9516093115194, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691131591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911087036132812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.13839874661736, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [36737.25, 81920.0], "load": 0.91, "temperature": 70, "power": 308.229}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911087036132812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.5416646940489, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69110107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.33866235533674, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910781860351562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.93898966642934, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910781860351562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [144, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911087036132812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [145, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910629272460938}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [36737.25, 81920.0], "load": 0.99, "temperature": 71, "power": 289.391}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [146, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910858154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.158668422715, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [147, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910629272460938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [148, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910552978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [149, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910476684570312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [150, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910552978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.06491177390993, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [151, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910476684570312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [152, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910247802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [153, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691009521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [154, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6909866333007812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.0605471662613, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [155, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6909027099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [156, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6908340454101562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [157, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [158, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6919326782226562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.34142391830588, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [159, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914596557617188}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [36737.25, 81920.0], "load": 0.91, "temperature": 72, "power": 331.895}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [160, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910934448242188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [161, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914291381835938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [162, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691009521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.8998474217483, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [163, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69134521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [164, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691009521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [165, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [166, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69110107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 211.56535640296636, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [167, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910018920898438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [168, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912841796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [169, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910476684570312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [170, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691253662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [36737.25, 81920.0], "load": 0.92, "temperature": 72, "power": 323.872}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.81350867264428, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [171, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910247802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [172, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [173, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910247802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [174, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.0281054550903, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [175, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910324096679688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [176, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910324096679688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [177, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910858154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [178, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6909027099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.31372980156456, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [179, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6909942626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [180, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6908721923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [181, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6908798217773438}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [36737.25, 81920.0], "load": 0.91, "temperature": 73, "power": 331.944}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [182, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6908721923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.04013483082193, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [183, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6906585693359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [184, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6905517578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [185, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6905288696289062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [186, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6909103393554688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.14908069588316, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [187, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6955337524414062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [188, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7002067565917969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [189, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.697906494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [190, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6913986206054688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.77423993874817, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [191, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6951828002929688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [192, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6938323974609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [193, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914520263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [194, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6948165893554688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.9427363759491, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [195, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912918090820312}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [36737.25, 81920.0], "load": 1.0, "temperature": 74, "power": 312.535}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [196, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6923904418945312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [197, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.693206787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [198, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6908798217773438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.85761168410528, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [199, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6921310424804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [200, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6922607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [201, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6906814575195312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [202, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6915359497070312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 211.66383063591593, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [203, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6916046142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [204, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6903610229492188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [205, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6906814575195312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [206, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69073486328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.3877497971556, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [207, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6897354125976562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [208, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6897735595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [209, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6895599365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [36737.25, 81920.0], "load": 0.9, "temperature": 74, "power": 344.11}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [210, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6880340576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 211.9254551383717, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [211, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6874923706054688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [212, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6850967407226562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [213, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6821975708007812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [214, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.67584228515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.10471974900412, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [215, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6980819702148438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [216, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.832855224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [217, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7104606628417969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [218, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.708587646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 211.3994134472237, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [219, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6919479370117188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [220, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6960067749023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [221, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6996231079101562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [222, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6961898803710938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.11670091723994, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [223, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914520263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [36737.25, 81920.0], "load": 0.92, "temperature": 74, "power": 268.082}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [224, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [225, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6962814331054688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [226, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6955718994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.8117259887736, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [227, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6918563842773438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [228, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691741943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [229, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.693878173828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [230, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6942672729492188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 204.16684740948892, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [231, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6923370361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [232, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [233, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [234, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6931381225585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 202.15512796532917, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [235, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6926040649414062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [236, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6917266845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [237, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [36737.25, 81920.0], "load": 0.89, "temperature": 75, "power": 293.737}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [238, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6917572021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.263793389667, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [239, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69232177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [240, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6920166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [241, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [242, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691253662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.49389126753155, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [243, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [244, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69171142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [245, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6917266845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [246, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6913909912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.7008568776156, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [247, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [248, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [36737.25, 81920.0], "load": 0.9, "temperature": 74, "power": 307.52}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [249, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914443969726562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [250, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.62231341005912, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [251, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6913299560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [252, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910629272460938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [253, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912384033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [254, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 211.66965465118525, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [255, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914215087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [256, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [257, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912460327148438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [258, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 212.11832082898178, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [259, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [36737.25, 81920.0], "load": 0.91, "temperature": 74, "power": 277.674}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [260, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912384033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [261, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [262, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912612915039062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.19117035456856, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"1": {"memory": [36737.25, 81920.0], "load": 0, "temperature": 72, "power": 104.455}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-whisper.D1-0efae956f1553a76c1e03985181900f5.json", "-m", "bench", "--precision", "tf32-fp16", "--num-workers", "8", "--model", "Whisper", "--batch-size", "64"], "time": 1712628611.1413274, "return_code": 0}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/README.md b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/README.md new file mode 100644 index 000000000..48681a3e0 --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/README.md @@ -0,0 +1,40 @@ +``` +================= +Benchmark results +================= + fail n perf sem% std% peak_memory score weight +bert-fp16 0 1 154.48 0.0% 0.3% 24616 154.483482 0.00 +bert-fp32 0 1 28.06 0.1% 0.5% 31580 28.060602 0.00 +bert-tf32 0 1 116.74 0.3% 1.5% 31582 116.735928 0.00 +bert-tf32-fp16 0 1 150.69 0.1% 0.3% 24616 150.688708 3.00 +bf16 0 1 271.78 0.1% 1.1% 1804 271.778801 0.00 +convnext_large-fp16 0 1 308.86 1.6% 8.6% 27478 308.861596 0.00 +convnext_large-fp32 0 1 43.02 2.2% 11.8% 49598 43.022425 0.00 +convnext_large-tf32 0 1 124.52 3.8% 20.4% 49598 124.522512 0.00 +convnext_large-tf32-fp16 0 1 309.11 1.6% 8.5% 27478 309.106568 3.00 +davit_large 0 1 291.05 0.7% 5.5% 34016 291.052985 1.00 +davit_large-multi 0 1 293.28 0.7% 5.2% 34260 293.275288 5.00 +dlrm 0 1 418797.28 0.1% 0.6% 7120 418797.282781 1.00 +focalnet 0 1 380.78 0.4% 3.4% 26112 380.781357 2.00 +fp16 0 1 252.23 0.1% 0.9% 1804 252.232285 0.00 +fp32 0 1 19.17 0.0% 0.2% 2182 19.167969 0.00 +llama 0 1 459.81 11.3% 70.7% 28442 459.812667 1.00 +opt-1_3b 1 1 NaN NaN NaN -1 NaN 5.00 +opt-6_7b 1 1 NaN NaN NaN 1534 NaN 5.00 +reformer 0 1 56.47 0.1% 0.5% 25420 56.474529 1.00 +regnet_y_128gf 0 1 78.29 1.0% 7.6% 31570 78.291422 2.00 +resnet152 0 1 643.39 0.8% 6.0% 35464 643.386597 1.00 +resnet152-multi 0 1 650.24 0.7% 5.4% 35478 650.239930 5.00 +resnet50 0 1 1001.67 1.7% 13.0% 4746 1001.666359 1.00 +rwkv 1 1 NaN NaN NaN -1 NaN 1.00 +stargan 0 1 37.97 2.9% 21.9% 37442 37.973348 1.00 +super-slomo 0 1 41.49 1.1% 8.6% 33816 41.492501 1.00 +t5 0 1 46.10 0.5% 3.6% 35460 46.097973 2.00 +tf32 0 1 133.58 0.0% 0.4% 2182 133.575008 0.00 +whisper 0 1 215.00 0.1% 0.4% 36740 215.000159 1.00 + +Scores +------ +Failure rate: 10.34% (FAIL) +Score: 66.37 +``` diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/badge.svg b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/badge.svg new file mode 100644 index 000000000..544903e36 --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/badge.svg @@ -0,0 +1 @@ +NVIDIA_A100_80GB_PCIeNVIDIA_A100_80GB_PCIepartialpartial \ No newline at end of file diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/bert-fp16.D0.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/bert-fp16.D0.data new file mode 100644 index 000000000..61aee7525 --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/bert-fp16.D0.data @@ -0,0 +1,447 @@ +{"event": "config", "data": {"argv": {"--batch-size": 32, "--model": "Bert", "--num-workers": 8, "--precision": "fp16"}, "capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "group": "hf", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 600, "name": "bert-fp16", "plan": {"method": "per_gpu"}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["bert-fp16", "D0"], "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 30}}, "weight": 0.0, "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 37, "power": 43.005, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711994479.646666, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp16.D0-b73ff48ec1c8cba5c27988b8c85e9d62.json", "-m", "bench", "--batch-size", "32", "--model", "Bert", "--num-workers", "8", "--precision", "fp16"], "time": 1711994479.6643915}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.47928237915039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.324040412902832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.062586784362793}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 43, "power": 277.073}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.039785385131836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.756427764892578}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.622476577758789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.533597946166992}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.476072311401367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.44356918334961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.426374435424805}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.416016578674316}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.406279563903809}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.395854949951172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3849458694458}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.377933502197266}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.374731063842773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.374163627624512}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.373811721801758}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 46, "power": 321.336}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.372345924377441}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.369985580444336}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.367439270019531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.366805076599121}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.366157531738281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36543083190918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.365005493164062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.364080429077148}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.363504409790039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.362876892089844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.362106323242188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36214828491211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361862182617188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 146.84668068933155, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36141586303711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361193656921387}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 48, "power": 238.605}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361042022705078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360857963562012}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360292434692383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 154.46326235293657, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36062240600586}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360210418701172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359718322753906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359888076782227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36003303527832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 154.95394297657907, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359695434570312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35957145690918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359235763549805}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359640121459961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35922622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 155.03336537007394, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358892440795898}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35894775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 48, "power": 309.303}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359354019165039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359068870544434}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358457565307617}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 154.77110795406855, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35875129699707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358539581298828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358678817749023}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358731269836426}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359058380126953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 155.0874034851953, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358646392822266}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359306335449219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358448028564453}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358570098876953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 154.9945635608395, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358207702636719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358234405517578}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 50, "power": 277.847}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358455657958984}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358585357666016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358243942260742}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 154.17321319453788, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358636856079102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358400344848633}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358379364013672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35833740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358665466308594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 155.49249583302463, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358572959899902}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358033180236816}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35838508605957}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358282089233398}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 154.5392337793341, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358419418334961}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 51, "power": 275.105}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35842227935791}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358055114746094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3580322265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358024597167969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 154.8082340285922, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358108520507812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357969284057617}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358566284179688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358064651489258}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358379364013672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 155.24040825549477, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358316421508789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358358383178711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358470916748047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358149528503418}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358348846435547}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 154.72765814331953, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35792350769043}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 52, "power": 315.202}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358392715454102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358460426330566}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358633995056152}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 154.37692010395014, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358476638793945}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357904434204102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358171463012695}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358545303344727}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 154.6560838910894, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357903480529785}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358219146728516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358108520507812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358248710632324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357797622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 154.21743662512276, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35807991027832}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 53, "power": 323.949}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357889175415039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35851001739502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358098983764648}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358307838439941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 154.82310310908244, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358318328857422}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358499526977539}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358034133911133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357908248901367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358236312866211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 154.04452839058226, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358089447021484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35861587524414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358320236206055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358344078063965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357931137084961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 154.64406816814912, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357934951782227}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 53, "power": 284.697}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358261108398438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358291625976562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358189582824707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358278274536133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 154.08573835305367, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357937812805176}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358482360839844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357876777648926}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358512878417969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 154.48284941398916, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358345031738281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358097076416016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358293533325195}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357551574707031}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35798454284668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 153.99994470631987, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 54, "power": 265.144}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35827922821045}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358026504516602}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358270645141602}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35817813873291}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35809326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 154.22686092451025, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358072280883789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358430862426758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357963562011719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [144, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358388900756836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [145, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35820198059082}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 154.95789359180523, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [146, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358226776123047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [147, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35807991027832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [148, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [149, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358162879943848}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [150, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358267784118652}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 56, "power": 311.421}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 153.65814704556666, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [151, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358076095581055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [152, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358078002929688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [153, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358617782592773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [154, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358043670654297}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [155, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357868194580078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 154.407459405835, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [156, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358116149902344}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [157, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35775375366211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [158, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358063697814941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [159, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357721328735352}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [160, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358312606811523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 153.59876826075757, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [161, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357690811157227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [162, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357881546020508}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [163, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358210563659668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [164, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [165, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 56, "power": 258.559}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 154.3402614303708, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [166, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358190536499023}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [167, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357914924621582}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [168, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357951164245605}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [169, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357860565185547}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [170, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357804298400879}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 154.11234365079994, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [171, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357797622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [172, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358224868774414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [173, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35790729522705}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [174, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3578519821167}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [175, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357871055603027}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 154.37431334848145, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [176, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358051300048828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [177, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358205795288086}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [178, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358312606811523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [179, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357698440551758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [180, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358502388000488}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 56, "power": 224.357}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 154.2218743666451, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 56, "power": 287.902}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp16.D0-b73ff48ec1c8cba5c27988b8c85e9d62.json", "-m", "bench", "--batch-size", "32", "--model", "Bert", "--num-workers", "8", "--precision", "fp16"], "time": 1711994521.3294342, "return_code": 0}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/bert-fp32.D0.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/bert-fp32.D0.data new file mode 100644 index 000000000..d6cc50e25 --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/bert-fp32.D0.data @@ -0,0 +1,228 @@ +{"event": "config", "data": {"argv": {"--batch-size": 32, "--model": "Bert", "--num-workers": 8, "--precision": "fp32"}, "capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "group": "hf", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 600, "name": "bert-fp32", "plan": {"method": "per_gpu"}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["bert-fp32", "D0"], "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 30}}, "weight": 0.0, "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 52, "power": 73.571, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711994524.034489, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp32.D0-b73ff48ec1c8cba5c27988b8c85e9d62.json", "-m", "bench", "--batch-size", "32", "--model", "Bert", "--num-workers", "8", "--precision", "fp32"], "time": 1711994524.051153}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.479292869567871}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29670.375, 81920.0], "load": 1.0, "temperature": 56, "power": 295.583}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.323901176452637}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.044936180114746}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.83090591430664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.708606719970703}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 58, "power": 291.577}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.565232276916504}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.491073608398438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.468774795532227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.4805908203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 0.98, "temperature": 59, "power": 306.945}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 24.316720537156787, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.515676498413086}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.562061309814453}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.30281424363468, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.608406066894531}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 60, "power": 301.516}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.640921592712402}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.290073935135755, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.648000717163086}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.63291072845459}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.2639593420825, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.614981651306152}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 61, "power": 269.735}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.622065544128418}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.3188250940711, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.669747352600098}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.752553939819336}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.26951447300595, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.848938941955566}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 0.98, "temperature": 62, "power": 298.378}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.93950080871582}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.20751708460664, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.007094383239746}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.038527488708496}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.17407807742796, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 63, "power": 309.853}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.016993522644043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.91185188293457}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.184179554869218, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.875959396362305}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.918696403503418}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.136860402445965, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 64, "power": 296.367}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.017876625061035}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.246811866760254}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.15975054290745, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.431685447692871}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.560530662536621}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.105373869215192, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 65, "power": 289.431}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.64004135131836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.689648628234863}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.069300218117935, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.736746788024902}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.803998947143555}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.067033863489886, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 66, "power": 304.903}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.90011978149414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.020580291748047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.097427860961403, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.152454376220703}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.279325485229492}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.073847707199143, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 67, "power": 299.1}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.385612487792969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.46435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.089556037224046, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.513260841369629}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.550541877746582}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.063955665952005, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 67, "power": 289.426}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.602174758911133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.679553985595703}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.059791149852078, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.78598690032959}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.926142692565918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.03958237262441, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.089705467224121}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 68, "power": 303.243}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.263202667236328}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.010311297597074, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.435498237609863}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.600459098815918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.987218120287988, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 69, "power": 302.195}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.752622604370117}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.883284568786621}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.99490383395129, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.991811752319336}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.081356048583984}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.963276859646065, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 70, "power": 294.78}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.160579681396484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.241079330444336}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.945232963875778, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.328137397766113}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.415624618530273}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.88006665665659, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 70, "power": 291.204}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.503317832946777}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.589831352233887}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.906453757773008, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.672152519226074}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.75645637512207}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.86729102414908, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.840280532836914}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 71, "power": 295.685}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.907486915588379}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.873655543241785, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.961742401123047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.999088287353516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.834783668433186, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 72, "power": 290.411}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.025399208068848}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.040262222290039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.83966038871566, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31580.375, 81920.0], "load": 1.0, "temperature": 72, "power": 254.868}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-fp32.D0-b73ff48ec1c8cba5c27988b8c85e9d62.json", "-m", "bench", "--batch-size", "32", "--model", "Bert", "--num-workers", "8", "--precision", "fp32"], "time": 1711994585.3361418, "return_code": 0}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/bert-tf32-fp16.D0.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/bert-tf32-fp16.D0.data new file mode 100644 index 000000000..98854c0cd --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/bert-tf32-fp16.D0.data @@ -0,0 +1,445 @@ +{"event": "config", "data": {"argv": {"--batch-size": 32, "--model": "Bert", "--num-workers": 8, "--precision": "tf32-fp16"}, "capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "group": "hf", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 600, "name": "bert-tf32-fp16", "plan": {"method": "per_gpu"}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["bert-tf32-fp16", "D0"], "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 30}}, "weight": 3.0, "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.01, "memory": 0.010771942138671876}, "temperature": 70, "power": 89.334, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711994633.138256, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32-fp16.D0-b73ff48ec1c8cba5c27988b8c85e9d62.json", "-m", "bench", "--batch-size", "32", "--model", "Bert", "--num-workers", "8", "--precision", "tf32-fp16"], "time": 1711994633.1550796}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.47928237915039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.324040412902832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.062586784362793}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.97, "temperature": 71, "power": 278.036}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.039785385131836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.756427764892578}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.622476577758789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.533597946166992}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.476072311401367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.44356918334961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.426374435424805}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.416016578674316}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.406279563903809}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.395854949951172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3849458694458}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.377933502197266}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.374731063842773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.374163627624512}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.373811721801758}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 72, "power": 308.814}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.372345924377441}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.369985580444336}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.367439270019531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.366805076599121}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.366157531738281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36543083190918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.365005493164062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.364080429077148}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.363504409790039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.362876892089844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.362106323242188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36214828491211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361862182617188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 153.05796083138304, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36141586303711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361193656921387}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 72, "power": 258.593}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.361042022705078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360857963562012}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 154.81843969864107, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360292434692383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36062240600586}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.360210418701172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359718322753906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359888076782227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.8241425649145, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.36003303527832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359695434570312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35957145690918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359235763549805}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359640121459961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.0969445050283, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35922622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358892440795898}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 72, "power": 236.448}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35894775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359354019165039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359068870544434}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.5724339040445, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358457565307617}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35875129699707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358539581298828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358678817749023}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358731269836426}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.66977721031645, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359058380126953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358646392822266}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.359306335449219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358448028564453}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358570098876953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.4670581181288, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358207702636719}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 72, "power": 332.579}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358234405517578}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358455657958984}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358585357666016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.70464239669812, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358243942260742}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358636856079102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358400344848633}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358379364013672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35833740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.81823018524548, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358665466308594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358572959899902}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358033180236816}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35838508605957}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.733155185774, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358282089233398}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 73, "power": 314.33}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358419418334961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35842227935791}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358055114746094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3580322265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.41061232303164, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358024597167969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358108520507812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357969284057617}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358566284179688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358064651489258}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.46132014389903, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358379364013672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358316421508789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358358383178711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358470916748047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358149528503418}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.4196018251181, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358348846435547}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 74, "power": 271.91}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35792350769043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358451843261719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358392715454102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358460426330566}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.1110723894388, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358633995056152}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358476638793945}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357904434204102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358171463012695}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.5465547978427, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358545303344727}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357903480529785}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358219146728516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358108520507812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358248710632324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.67412592738864, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357797622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 73, "power": 252.356}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35807991027832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357889175415039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35851001739502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358098983764648}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.58731447926385, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358307838439941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358318328857422}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358499526977539}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358034133911133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357908248901367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.1167839619326, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358236312866211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358089447021484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35861587524414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358320236206055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358344078063965}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 74, "power": 266.218}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.54084588594577, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357931137084961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357934951782227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358261108398438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358291625976562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358189582824707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.6453244133927, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358278274536133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357937812805176}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358482360839844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357876777648926}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.5857988513242, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358512878417969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358345031738281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358097076416016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358293533325195}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357551574707031}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 74, "power": 269.045}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.59718551517793, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35798454284668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35827922821045}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358026504516602}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358270645141602}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35817813873291}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.83042352772858, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35809326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358072280883789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358430862426758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357963562011719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [144, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358388900756836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 151.13578623647598, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [145, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35820198059082}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [146, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358226776123047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [147, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35807991027832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [148, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 75, "power": 321.427}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [149, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358162879943848}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.7241087159921, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [150, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358267784118652}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [151, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358076095581055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [152, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358078002929688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [153, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358617782592773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [154, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358043670654297}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.62632667196814, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [155, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357868194580078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [156, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358116149902344}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [157, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35775375366211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [158, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358063697814941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [159, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357721328735352}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.3268707006744, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [160, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358312606811523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [161, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357690811157227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [162, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357881546020508}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [163, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358210563659668}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 74, "power": 247.625}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [164, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.09597924917082, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [165, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35821533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [166, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358190536499023}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [167, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357914924621582}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [168, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357951164245605}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [169, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357860565185547}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.62171343303618, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [170, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357804298400879}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [171, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357797622680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [172, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358224868774414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [173, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35790729522705}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [174, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.3578519821167}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.4619670449285, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [175, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357871055603027}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [176, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358051300048828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [177, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358205795288086}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 75, "power": 246.403}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [178, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.358312606811523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [179, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.357698440551758}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.604452806832, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24616.375, 81920.0], "load": 0.98, "temperature": 75, "power": 258.865}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32-fp16.D0-b73ff48ec1c8cba5c27988b8c85e9d62.json", "-m", "bench", "--batch-size", "32", "--model", "Bert", "--num-workers", "8", "--precision", "tf32-fp16"], "time": 1711994675.0286233, "return_code": 0}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/bert-tf32.D0.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/bert-tf32.D0.data new file mode 100644 index 000000000..11ddcb3d3 --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/bert-tf32.D0.data @@ -0,0 +1,375 @@ +{"event": "config", "data": {"argv": {"--batch-size": 32, "--model": "Bert", "--num-workers": 8, "--precision": "tf32"}, "capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "group": "hf", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 600, "name": "bert-tf32", "plan": {"method": "per_gpu"}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["bert-tf32", "D0"], "tags": ["huggingface", "language-modeling", "nlp", "precision-showcase", "transformer"], "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 30}}, "weight": 0.0, "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 66, "power": 85.998, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711994588.020471, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32.D0-b73ff48ec1c8cba5c27988b8c85e9d62.json", "-m", "bench", "--batch-size", "32", "--model", "Bert", "--num-workers", "8", "--precision", "tf32"], "time": 1711994588.0373726}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.479286193847656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.323932647705078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.044816970825195}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.99, "temperature": 68, "power": 321.126}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.830974578857422}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.708649635314941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.565240859985352}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.491065979003906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.468748092651367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.480533599853516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.515584945678711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.561928749084473}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.608244895935059}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.640739440917969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.647823333740234}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.632768630981445}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.99, "temperature": 69, "power": 313.287}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.614849090576172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.621868133544922}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.669413566589355}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.752106666564941}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.848526000976562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.939350128173828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.007367134094238}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.03938102722168}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.01879596710205}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.915424346923828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 106.08152468933368, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.861414909362793}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.006791114807129}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.98, "temperature": 69, "power": 291.335}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.027995109558105}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.285082817077637}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 109.36398966819935, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.481186866760254}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.615607261657715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.69710636138916}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.744179725646973}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 117.62872020332875, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.784246444702148}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.841986656188965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.928215026855469}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.041173934936523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 117.68313335498243, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.170112609863281}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 1.0, "temperature": 70, "power": 296.138}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.299582481384277}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.414661407470703}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.506692886352539}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 117.50056251328866, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.572625160217285}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.624845504760742}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.687198638916016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.77263069152832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.91507919531979, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.880777359008789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.014769554138184}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.169602394104004}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.335892677307129}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 117.31122793873574, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.508112907409668}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.98, "temperature": 71, "power": 320.607}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.685002326965332}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.864999771118164}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.042545318603516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 117.80319840929606, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.21193790435791}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.368173599243164}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.505072593688965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.626041412353516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 117.75970767330169, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.737335205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.839637756347656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.941758155822754}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.044066429138184}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.98, "temperature": 71, "power": 319.891}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.64099231361924, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.145570755004883}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.23983097076416}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.333688735961914}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.425797462463379}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.1426748192086, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.513972282409668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.597168922424316}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.675834655761719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.757519721984863}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.19283237190045, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.833535194396973}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.900604248046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.956380844116211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.990116119384766}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.98, "temperature": 71, "power": 270.011}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.20094943565344, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.019303321838379}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.04137897491455}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.049532890319824}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.041813850402832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.31888489330693, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.022936820983887}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.001933097839355}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.978167533874512}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.95536994934082}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.41279473527791, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.924501419067383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.874979019165039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.806092262268066}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.98, "temperature": 72, "power": 285.093}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.719812393188477}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.1842526305669, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.609824180603027}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.481300354003906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.328900337219238}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.169635772705078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.13596890026825, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.010271072387695}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.85876178741455}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.748523712158203}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.666166305541992}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 115.89375671640538, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.625612258911133}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.650134086608887}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 1.0, "temperature": 73, "power": 320.332}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.727069854736328}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.84864330291748}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.11097476588397, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 13.988987922668457}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.1289644241333}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.244298934936523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.312968254089355}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.03804205675542, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.326717376708984}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.288104057312012}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.236703872680664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.258355140686035}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.02342309508725, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 14.536263465881348}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.127537727355957}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.98, "temperature": 73, "power": 283.88}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.74303913116455}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.15192413330078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.1808633754672, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.16707420349121}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.868474960327148}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.518776893615723}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.283734321594238}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.08031165452468, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.241960525512695}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.301491737365723}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.411982536315918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.539347648620605}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.52230902901606, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.672064781188965}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 1.0, "temperature": 74, "power": 281.631}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.800228118896484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 15.920104026794434}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.03019905090332}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 116.97846699768348, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.13632583618164}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.23299217224121}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.31147003173828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.37289810180664}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 117.90484421936218, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.41785430908203}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.443866729736328}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.45807647705078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.459491729736328}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 118.63727695928938, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.45636749267578}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.98, "temperature": 74, "power": 324.361}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.44569206237793}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.440540313720703}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.448402404785156}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 118.94185095801245, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.4744815826416}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.53107261657715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.61451530456543}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.71721076965332}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 118.87827389489294, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.82810401916504}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 16.940759658813477}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 17.044357299804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 17.134565353393555}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 119.50491619090819, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 0.98, "temperature": 74, "power": 310.579}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 17.20937156677246}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 17.272945404052734}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 17.317352294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [144, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 17.34956169128418}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 118.95655908502496, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31582.375, 81920.0], "load": 1.0, "temperature": 74, "power": 304.612}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-bert-tf32.D0-b73ff48ec1c8cba5c27988b8c85e9d62.json", "-m", "bench", "--batch-size", "32", "--model", "Bert", "--num-workers", "8", "--precision", "tf32"], "time": 1711994630.4929757, "return_code": 0}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/bf16.D0.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/bf16.D0.data new file mode 100644 index 000000000..b7775a950 --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/bf16.D0.data @@ -0,0 +1,110 @@ +{"event": "config", "data": {"argv": {"--dtype": "bf16", "--m": 8192, "--n": 8192, "--number": 10, "--repeat": 90}, "capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "group": "flops", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 600, "name": "bf16", "plan": {"method": "per_gpu"}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["bf16", "D0"], "tags": ["diagnostic", "flops"], "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 60}}, "weight": 0.0, "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 71, "power": 90.613, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711994677.788216, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--dtype", "bf16", "--m", "8192", "--n", "8192", "--number", "10", "--repeat", "90"], "time": 1711994677.798339}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 180.84298561931325, "units": "Tflops", "t": 1711994679.5962987}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [882.5, 81920.0], "load": 0, "temperature": 68, "power": 54.757}}, "t": 1711994679.1082988}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.80406722642476, "units": "Tflops", "t": 1711994679.6778724}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 0.07, "temperature": 72, "power": 348.573}}, "t": 1711994679.6201472}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.6677954161773, "units": "Tflops", "t": 1711994679.7580817}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.1795319502964, "units": "Tflops", "t": 1711994679.8383505}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.25779837987693, "units": "Tflops", "t": 1711994679.9185843}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.2080614112918, "units": "Tflops", "t": 1711994679.9988327}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.1428599367139, "units": "Tflops", "t": 1711994680.0791097}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.1730117641659, "units": "Tflops", "t": 1711994680.1593766}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 304.823}}, "t": 1711994680.158327}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.0996810329595, "units": "Tflops", "t": 1711994680.239726}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.1485638278422, "units": "Tflops", "t": 1711994680.3200035}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 273.1733998203631, "units": "Tflops", "t": 1711994680.4005527}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 259.69844426140486, "units": "Tflops", "t": 1711994680.485287}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.01441225775636, "units": "Tflops", "t": 1711994680.567701}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.1537156530753, "units": "Tflops", "t": 1711994680.6491585}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.39323956763985, "units": "Tflops", "t": 1711994680.7293584}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 285.453}}, "t": 1711994680.667082}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.41854762648614, "units": "Tflops", "t": 1711994680.8095968}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.23088925523217, "units": "Tflops", "t": 1711994680.8898365}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.1648619675812, "units": "Tflops", "t": 1711994680.970116}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.1273791428709, "units": "Tflops", "t": 1711994681.0504043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.05733004272116, "units": "Tflops", "t": 1711994681.1307073}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.0939791757238, "units": "Tflops", "t": 1711994681.2109964}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 305.015}}, "t": 1711994681.1741226}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.1355267111536, "units": "Tflops", "t": 1711994681.291317}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.4587037640649, "units": "Tflops", "t": 1711994681.3726757}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.5264115585658, "units": "Tflops", "t": 1711994681.4546206}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.0028602726347, "units": "Tflops", "t": 1711994681.5367243}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.96782182455905, "units": "Tflops", "t": 1711994681.6188376}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.2463275715957, "units": "Tflops", "t": 1711994681.700257}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 286.041}}, "t": 1711994681.6821117}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 273.84089297848305, "units": "Tflops", "t": 1711994681.7806795}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.8773498891289, "units": "Tflops", "t": 1711994681.860739}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.0850195936222, "units": "Tflops", "t": 1711994681.9410188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.1811620452851, "units": "Tflops", "t": 1711994682.0212708}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.22518193792575, "units": "Tflops", "t": 1711994682.1015158}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.23415068607136, "units": "Tflops", "t": 1711994682.1817539}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 273.02461168469716, "units": "Tflops", "t": 1711994682.2623463}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 75, "power": 306.08}}, "t": 1711994682.191358}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.43887843656614, "units": "Tflops", "t": 1711994682.3437662}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.42950588189274, "units": "Tflops", "t": 1711994682.4257464}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.2210833349263, "units": "Tflops", "t": 1711994682.5077903}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.9125575672086, "units": "Tflops", "t": 1711994682.5899181}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.0786997005267, "units": "Tflops", "t": 1711994682.6716897}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.06062305814055, "units": "Tflops", "t": 1711994682.7525678}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 285.473}}, "t": 1711994682.6995738}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.755343761127, "units": "Tflops", "t": 1711994682.8327043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.43569444797527, "units": "Tflops", "t": 1711994682.9128857}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.1721967627041, "units": "Tflops", "t": 1711994682.993153}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.1868675303896, "units": "Tflops", "t": 1711994683.073405}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.1876826190742, "units": "Tflops", "t": 1711994683.153668}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 273.61343350849984, "units": "Tflops", "t": 1711994683.234086}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 75, "power": 305.299}}, "t": 1711994683.2066154}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.70954670834743, "units": "Tflops", "t": 1711994683.3154185}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.0889050701148, "units": "Tflops", "t": 1711994683.3971903}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.5076501609809, "units": "Tflops", "t": 1711994683.4791362}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.04258159246194, "units": "Tflops", "t": 1711994683.5612245}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.00519649497096, "units": "Tflops", "t": 1711994683.6433456}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.7151086237548, "units": "Tflops", "t": 1711994683.7246244}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 74, "power": 298.017}}, "t": 1711994683.717353}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.3916069499456, "units": "Tflops", "t": 1711994683.804866}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.7618916742067, "units": "Tflops", "t": 1711994683.8849492}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.1387858740782, "units": "Tflops", "t": 1711994683.9652174}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.08990656579095, "units": "Tflops", "t": 1711994684.0454962}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.13145286647296, "units": "Tflops", "t": 1711994684.125776}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 273.31506081717924, "units": "Tflops", "t": 1711994684.2062912}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 270.6293216452113, "units": "Tflops", "t": 1711994684.287601}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 75, "power": 299.572}}, "t": 1711994684.2270813}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.95366914535515, "units": "Tflops", "t": 1711994684.369184}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.9966305455246, "units": "Tflops", "t": 1711994684.4512892}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.95380901049566, "units": "Tflops", "t": 1711994684.5334063}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.9779431017937, "units": "Tflops", "t": 1711994684.615515}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.03968418244045, "units": "Tflops", "t": 1711994684.6967006}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.8732539667643, "units": "Tflops", "t": 1711994684.7767513}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 75, "power": 308.397}}, "t": 1711994684.7341104}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.11027082579784, "units": "Tflops", "t": 1711994684.857079}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.10131017859925, "units": "Tflops", "t": 1711994684.9373553}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.15508285120416, "units": "Tflops", "t": 1711994685.0176146}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.05081566732457, "units": "Tflops", "t": 1711994685.097908}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.14491100558774, "units": "Tflops", "t": 1711994685.17876}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.4840520768818, "units": "Tflops", "t": 1711994685.2598097}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 75, "power": 302.678}}, "t": 1711994685.2439747}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.53579324057705, "units": "Tflops", "t": 1711994685.3417997}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.22732350158134, "units": "Tflops", "t": 1711994685.4238436}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.88065478552045, "units": "Tflops", "t": 1711994685.5059907}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.985729219952, "units": "Tflops", "t": 1711994685.588097}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 269.54028437076516, "units": "Tflops", "t": 1711994685.6697395}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 272.82271341178136, "units": "Tflops", "t": 1711994685.750409}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.516558334418, "units": "Tflops", "t": 1711994685.830563}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 75, "power": 287.875}}, "t": 1711994685.7509916}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.5631379504233, "units": "Tflops", "t": 1711994685.910773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 273.97673643849885, "units": "Tflops", "t": 1711994685.9910944}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 273.9751087733485, "units": "Tflops", "t": 1711994686.071417}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.0320885630393, "units": "Tflops", "t": 1711994686.1517217}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.8617500922511, "units": "Tflops", "t": 1711994686.2326663}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 268.7094726787059, "units": "Tflops", "t": 1711994686.3145616}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 75, "power": 306.474}}, "t": 1711994686.2592137}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.9436895561888, "units": "Tflops", "t": 1711994686.3967319}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.89932866243885, "units": "Tflops", "t": 1711994686.4788768}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 267.8596497855228, "units": "Tflops", "t": 1711994686.5610306}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.1887082333833, "units": "Tflops", "t": 1711994686.64217}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 271.21502360809865, "units": "Tflops", "t": 1711994686.7233}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 273.80349867912213, "units": "Tflops", "t": 1711994686.8036625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 75, "power": 303.361}}, "t": 1711994686.7676277}, "pipe": "data"} +{"event": "end", "data": {"command": ["/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--dtype", "bf16", "--m", "8192", "--n", "8192", "--number", "10", "--repeat", "90"], "time": 1711994687.7149699, "return_code": 0}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/convnext_large-fp16.D0.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/convnext_large-fp16.D0.data new file mode 100644 index 000000000..7725512f0 --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/convnext_large-fp16.D0.data @@ -0,0 +1,295 @@ +{"event": "config", "data": {"argv": {"--batch-size": 128, "--epochs": 50, "--lr": 0.01, "--model": "convnext_large", "--no-stdout": true, "--precision": "fp16"}, "capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "group": "torchvision", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 600, "name": "convnext_large-fp16", "plan": {"method": "per_gpu"}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["convnext_large-fp16", "D0"], "tags": ["classification", "convnet", "precision-showcase", "vision"], "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 30}}, "weight": 0.0, "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 70, "power": 84.436, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711994690.437892, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp16.D0-b73ff48ec1c8cba5c27988b8c85e9d62.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--batch-size", "128", "--epochs", "50", "--lr", "0.01", "--model", "convnext_large", "--no-stdout", "--precision", "fp16"], "time": 1711994690.4550748}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 65, "power": 53.156}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 64, "power": 52.374}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.214508056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [10736.375, 81920.0], "load": 0.06, "temperature": 64, "power": 101.696}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.33624267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.254852294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.179443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.268218994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.201690673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.164154052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13348388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.16265869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.99, "temperature": 69, "power": 140.01}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0748291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077056884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.142242431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1138916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11871337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0850830078125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 1.0, "temperature": 69, "power": 263.721}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01800537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 331.2466710949573, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.074462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9622802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.078765869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 318.4342112183079, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.048004150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0762939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.083831787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 321.4397156287124, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.153045654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.89, "temperature": 71, "power": 343.837}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962799072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.094970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 346.9193550878685, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.002777099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.088836669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 312.67423573311373, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.033294677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938385009765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.112030029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 317.7981911123931, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.24, "temperature": 67, "power": 81.938}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 288.26636121662756, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84075927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938690185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93060302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 223.14655348698267, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.798553466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88812255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.874664306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 313.72495758132203, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944305419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.99, "temperature": 71, "power": 317.05}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958221435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.954071044921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 312.42538403132943, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94793701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.856536865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89544677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 316.5451288621834, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9688720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.985076904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.99, "temperature": 72, "power": 283.283}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 316.75771815031874, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.878204345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.996490478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.023529052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 315.2078760112023, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.968353271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947784423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.955780029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.95202040605636, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0440673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.94, "temperature": 70, "power": 218.432}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.2876859030998, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.907257080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.994720458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921539306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 316.5071878300177, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.981842041015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.964111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.957977294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.54617998396475, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04656982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.9, "temperature": 73, "power": 354.935}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [2, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.791717529296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 345.84510499106256, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962738037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77764892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 200.96912225072367, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83221435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86846923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85845947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 330.02454619441926, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.99, "temperature": 73, "power": 117.227}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921295166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.807586669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 330.0102470244265, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.933868408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.926849365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 308.59256096245036, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95281982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90093994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88641357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.89, "temperature": 73, "power": 274.725}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 311.7787685280683, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.918609619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98846435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.898773193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 313.5892917566981, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9808349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92547607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 315.4013184116795, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.915435791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.900665283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.89, "temperature": 74, "power": 300.649}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.936798095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.4172137175468, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.848236083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9835205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.911590576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 316.68784548247135, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99957275390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94586181640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9364013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 315.0010858062818, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.957916259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.89, "temperature": 74, "power": 295.686}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958465576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.973114013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.086504406165, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [3, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.763885498046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 274.1885373635467, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.794891357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.780670166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.772857666015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 219.40316993254257, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.88, "temperature": 74, "power": 304.142}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.88, "temperature": 74, "power": 304.142}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp16.D0-b73ff48ec1c8cba5c27988b8c85e9d62.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--batch-size", "128", "--epochs", "50", "--lr", "0.01", "--model", "convnext_large", "--no-stdout", "--precision", "fp16"], "time": 1711994741.3522916, "return_code": 0}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/convnext_large-fp32.D0.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/convnext_large-fp32.D0.data new file mode 100644 index 000000000..a85d4c682 --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/convnext_large-fp32.D0.data @@ -0,0 +1,171 @@ +{"event": "config", "data": {"argv": {"--batch-size": 128, "--epochs": 50, "--lr": 0.01, "--model": "convnext_large", "--no-stdout": true, "--precision": "fp32"}, "capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "group": "torchvision", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 600, "name": "convnext_large-fp32", "plan": {"method": "per_gpu"}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["convnext_large-fp32", "D0"], "tags": ["classification", "convnet", "precision-showcase", "vision"], "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 30}}, "weight": 0.0, "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 69, "power": 87.569, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711994743.976525, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp32.D0-b73ff48ec1c8cba5c27988b8c85e9d62.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--batch-size", "128", "--epochs", "50", "--lr", "0.01", "--model", "convnext_large", "--no-stdout", "--precision", "fp32"], "time": 1711994743.9917283}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 65, "power": 52.96}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 64, "power": 52.374}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.214483737945557}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2132.375, 81920.0], "load": 0, "temperature": 63, "power": 78.075}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.33617639541626}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 70, "power": 288.458}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.254825592041016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.179396152496338}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 71, "power": 306.187}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.268494129180908}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 63.86393036042333, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 72, "power": 298.58}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2018351554870605}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 32.04951540360266, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.164165019989014}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 71, "power": 312.271}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.22237458048806, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.133245468139648}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.99, "temperature": 72, "power": 302.367}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.23430391898081, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.162621021270752}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.15382888921861, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.066978931427002}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 71, "power": 311.214}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.15006887522126, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.074810981750488}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.99, "temperature": 72, "power": 307.453}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.24498343792025, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0772786140441895}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.22390349302507, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043660640716553}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 71, "power": 325.351}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.19446770348548, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.142033576965332}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.99, "temperature": 72, "power": 287.787}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.2012401751643, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11406946182251}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.2361166213753, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.118796348571777}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 72, "power": 291.854}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.278667694884746, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.084875583648682}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 73, "power": 291.771}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.23164165115955, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.017924785614014}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.21061246001272, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0746049880981445}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 72, "power": 274.817}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.15435449675367, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962323188781738}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 74, "power": 299.765}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.169448443078196, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.078864097595215}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.20187155732401, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.047895908355713}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 73, "power": 294.603}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.141222831689035, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.076254367828369}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 75, "power": 293.041}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.12072067261808, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.083705902099609}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.1190951518724, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.153024196624756}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 75, "power": 287.398}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.081153328636965, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962716102600098}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 76, "power": 299.842}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.09329939757067, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.094931125640869}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.06842956318212, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.002690315246582}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 76, "power": 322.859}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.06075515857467, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.089021682739258}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 76, "power": 295.282}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.05890689602178, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.033406734466553}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.026192563164614, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93844747543335}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 76, "power": 294.796}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.97750401685032, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.112069606781006}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 77, "power": 267.599}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.945217537768386, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.875514507293701}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 55.93648718482034, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.840810298919678}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 77, "power": 286.324}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 65.5280009287775, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938738822937012}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.9, "temperature": 77, "power": 297.041}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 31.881101972710656, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 77, "power": 263.968}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-fp32.D0-b73ff48ec1c8cba5c27988b8c85e9d62.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--batch-size", "128", "--epochs", "50", "--lr", "0.01", "--model", "convnext_large", "--no-stdout", "--precision", "fp32"], "time": 1711994827.3025699, "return_code": 0}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/convnext_large-tf32-fp16.D0.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/convnext_large-tf32-fp16.D0.data new file mode 100644 index 000000000..e64b5b4cb --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/convnext_large-tf32-fp16.D0.data @@ -0,0 +1,295 @@ +{"event": "config", "data": {"argv": {"--batch-size": 128, "--epochs": 50, "--lr": 0.01, "--model": "convnext_large", "--no-stdout": true, "--precision": "tf32-fp16"}, "capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "group": "torchvision", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 600, "name": "convnext_large-tf32-fp16", "plan": {"method": "per_gpu"}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["convnext_large-tf32-fp16", "D0"], "tags": ["classification", "convnet", "precision-showcase", "vision"], "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 30}}, "weight": 3.0, "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.02, "memory": 0.010771942138671876}, "temperature": 72, "power": 90.321, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711994889.088552, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32-fp16.D0-b73ff48ec1c8cba5c27988b8c85e9d62.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--batch-size", "128", "--epochs", "50", "--lr", "0.01", "--model", "convnext_large", "--no-stdout", "--precision", "tf32-fp16"], "time": 1711994889.103453}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 67, "power": 54.341}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 66, "power": 53.657}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.214508056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2206.375, 81920.0], "load": 0, "temperature": 65, "power": 81.146}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.33624267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.254852294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.179443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.268218994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.201690673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.164154052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13348388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.88, "temperature": 71, "power": 293.148}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.16265869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0748291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077056884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.142242431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1138916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11871337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.89, "temperature": 72, "power": 272.151}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0850830078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01800537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 326.32959709796177, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.074462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9622802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.078765869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 326.36146570215897, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.048004150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0762939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 346.51491547668087, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.083831787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.89, "temperature": 73, "power": 306.877}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.153045654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962799072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 317.05488719293623, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.094970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.002777099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.088836669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 315.3596915896534, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.033294677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938385009765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.112030029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.37091088433556, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.89, "temperature": 73, "power": 304.428}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 282.54021755817485, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84075927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938690185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93060302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 220.61501464665668, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.798553466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88812255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.874664306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.99, "temperature": 73, "power": 130.251}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 311.1359299993161, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944305419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958221435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.954071044921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 308.8674272303455, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94793701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.856536865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89544677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 313.7392801139958, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9688720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.99, "temperature": 73, "power": 261.964}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.985076904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.43145998183763, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.878204345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.996490478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.023529052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 315.8246234584011, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.968353271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947784423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.955780029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 317.2572101602385, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0440673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.91, "temperature": 74, "power": 294.6}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.9138094803945, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.907257080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.994720458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921539306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.62336899829285, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.981842041015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.964111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.957977294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.9, "temperature": 75, "power": 319.06}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.7460609846034, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04656982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [2, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.791717529296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 346.16278512127116, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962738037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77764892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 197.54004935255716, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83221435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86846923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.88, "temperature": 74, "power": 345.598}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85845947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 323.8684630503278, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921295166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.807586669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.933868408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 344.25073005628843, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.926849365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 306.00546690002756, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95281982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90093994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.89, "temperature": 74, "power": 285.451}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88641357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 311.912192740863, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.918609619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98846435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.898773193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 313.67518710065286, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9808349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92547607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 315.2123641721501, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.915435791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.89, "temperature": 75, "power": 277.253}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.900665283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.936798095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.8841586813814, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.848236083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9835205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.911590576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.5340055106325, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99957275390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94586181640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9364013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 313.3564773585718, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.957916259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.9, "temperature": 75, "power": 259.735}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958465576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.973114013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 314.210454522835, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [3, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.763885498046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 294.6907854252605, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.794891357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.780670166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.772857666015625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.89, "temperature": 74, "power": 261.343}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 223.15644290842968, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27478.375, 81920.0], "load": 0.99, "temperature": 74, "power": 310.639}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32-fp16.D0-b73ff48ec1c8cba5c27988b8c85e9d62.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--batch-size", "128", "--epochs", "50", "--lr", "0.01", "--model", "convnext_large", "--no-stdout", "--precision", "tf32-fp16"], "time": 1711994940.3835528, "return_code": 0}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/convnext_large-tf32.D0.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/convnext_large-tf32.D0.data new file mode 100644 index 000000000..12abf66d5 --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/convnext_large-tf32.D0.data @@ -0,0 +1,198 @@ +{"event": "config", "data": {"argv": {"--batch-size": 128, "--epochs": 50, "--lr": 0.01, "--model": "convnext_large", "--no-stdout": true, "--precision": "tf32"}, "capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "group": "torchvision", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 600, "name": "convnext_large-tf32", "plan": {"method": "per_gpu"}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["convnext_large-tf32", "D0"], "tags": ["classification", "convnet", "precision-showcase", "vision"], "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 30}}, "weight": 0.0, "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 72, "power": 90.419, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711994829.997748, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32.D0-b73ff48ec1c8cba5c27988b8c85e9d62.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--batch-size", "128", "--epochs", "50", "--lr", "0.01", "--model", "convnext_large", "--no-stdout", "--precision", "tf32"], "time": 1711994830.0146127}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 67, "power": 54.145}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 65, "power": 53.266}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2144575119018555}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2504.375, 81920.0], "load": 0, "temperature": 65, "power": 81.049}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.336220741271973}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2547926902771}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.179419040679932}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 71, "power": 248.588}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.268465518951416}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2018303871154785}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1641645431518555}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.133294105529785}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 72, "power": 197.839}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.162642955780029}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 118.03024836419114, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.066996097564697}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.074804782867432}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 136.3329534850501, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077259063720703}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 73, "power": 292.96}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 153.66995535818165, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0435991287231445}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.6318675822153, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.142058372497559}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.114046573638916}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.6876039298953, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.97, "temperature": 73, "power": 301.564}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1187896728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.29574629298628, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.084855556488037}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.017906188964844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.44070464366897, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.074584484100342}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 73, "power": 288.886}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.15639829852064, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962344169616699}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.078824996948242}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.31512623810434, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.047876834869385}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.2521155661082, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0762128829956055}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 74, "power": 276.882}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.083765983581543}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.52728874290585, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.153017044067383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.3510135885975, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962660312652588}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.094902038574219}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 75, "power": 306.274}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.76212184230977, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.002725601196289}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.39191452934276, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.089033126831055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.033377170562744}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.97, "temperature": 75, "power": 292.074}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.69937918890375, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938430309295654}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.03782972940924, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.112060070037842}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.875522613525391}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 152.43661206923926, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.840785503387451}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.99, "temperature": 75, "power": 314.266}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 135.58032742541033, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938703536987305}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.46746716413169, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9308247566223145}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.798532009124756}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 75, "power": 321.201}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.47275188271684, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.888091087341309}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.04215422932012, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.874434947967529}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944365501403809}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.4871092845059, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958274841308594}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 76, "power": 275.795}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.25096274141401, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.953979015350342}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947924613952637}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.30061455655039, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85654878616333}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 0.96, "temperature": 76, "power": 238.754}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 96.94345055864633, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.895290374755859}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912010192871094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.80690223855882, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.968919277191162}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 96.88339573625653, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.984976768493652}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 76, "power": 272.07}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.878252029418945}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 149.84488373714584, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.996346950531006}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 97.21847713778315, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.023502349853516}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.968320369720459}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 77, "power": 312.972}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 150.41897360113876, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 32]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947745323181152}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 96.96985043183147, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 30]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [49598.375, 81920.0], "load": 1.0, "temperature": 77, "power": 277.152}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-convnext_large-tf32.D0-b73ff48ec1c8cba5c27988b8c85e9d62.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--batch-size", "128", "--epochs", "50", "--lr", "0.01", "--model", "convnext_large", "--no-stdout", "--precision", "tf32"], "time": 1711994886.4849188, "return_code": 0}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/davit_large-multi.0.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/davit_large-multi.0.data new file mode 100644 index 000000000..e40fdc539 --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/davit_large-multi.0.data @@ -0,0 +1,266 @@ +{"event": "config", "data": {"argv": {"--amp": true, "--batch-size": 128, "--lr-base": 0.01, "--model": "davit_large"}, "capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "group": "timm", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 600, "name": "davit_large-multi", "plan": {"method": "njobs", "n": 1}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["davit_large-multi", "0"], "tags": ["classification", "multigpu", "transformer", "vision"], "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 60}}, "weight": 5.0, "job-number": 0, "devices": ["0"]}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.82, "memory": 0.010771942138671876}, "temperature": 73, "power": 91.126, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711995054.770036, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large-multi.0-ce1dc503950f5ff93fdadb45d1d68afe.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--batch-size", "128", "--lr-base", "0.01", "--model", "davit_large", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/jofevozu.2024-04-01_14:01:06.047754/davit_large-multi.0", "--checkpoint-hist", "1"], "time": 1711995054.7870913}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model davit_large created, param count:196811752\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.005) calculated from base learning rate (0.01) and global batch size (128) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2154.375, 81920.0], "load": 0, "temperature": 67, "power": 83.425}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.224214553833008}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 0/32 ( 0%)] Loss: 7.224 (7.22) Time: 3.274s, 39.10/s (3.274s, 39.10/s) LR: 1.000e-05 Data: 0.751 (0.751)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 7.176412582397461}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 1.0, "temperature": 72, "power": 277.035}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.255987167358398}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.163339138031006}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 0.99, "temperature": 72, "power": 294.62}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.234711647033691}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.243332386016846}, "pipe": "data"} +{"event": "data", "data": {"rate": 293.0370469125664, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.048550128936768}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 0.98, "temperature": 72, "power": 260.575}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 272.4927501387861, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.24672794342041}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.31455703279596, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.151210784912109}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.001905866473, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 0.99, "temperature": 74, "power": 324.442}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.270848274230957}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.3968608843902, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 277.2208214148233, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.312984943389893}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.9067262387922, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 0.96, "temperature": 75, "power": 301.283}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.311421871185303}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 31/32 (100%)] Loss: 7.311 (7.24) Time: 0.414s, 308.99/s (0.511s, 250.43/s) LR: 1.000e-05 Data: 0.000 (0.031)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.747 (0.747) Loss: 7.1176 (7.1176) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.338 (0.180) Loss: 7.0505 (7.2336) Acc@1: 0.0000 ( 0.0969) Acc@5: 0.0000 ( 0.5329)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/jofevozu.2024-04-01_14:01:06.047754/davit_large-multi.0/20240401-181100-davit_large-224/checkpoint-0.pth.tar', 0.09689922480620156)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 309.0680994481035, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32722.375, 81920.0], "load": 0.97, "temperature": 75, "power": 302.687}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32722.375, 81920.0], "load": 0.96, "temperature": 75, "power": 332.393}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.413616180419922}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25322.375, 81920.0], "load": 0, "temperature": 71, "power": 85.29}}}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 0/32 ( 0%)] Loss: 7.414 (7.41) Time: 0.943s, 135.69/s (0.943s, 135.69/s) LR: 1.008e-03 Data: 0.530 (0.530)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 287.9017215444442, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.087185859680176}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.95213529907113, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0225982666015625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32552.375, 81920.0], "load": 0.96, "temperature": 74, "power": 256.518}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 274.9467519700851, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.015655040740967}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.8133358016329, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.004358291625977}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.03989011477375, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32552.375, 81920.0], "load": 0.99, "temperature": 75, "power": 330.636}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06734561920166}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.2871343398134, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.236584752351, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.943554401397705}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.71152092626573, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.032367706298828}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32552.375, 81920.0], "load": 0.99, "temperature": 75, "power": 297.704}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.2592186489669, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.978535175323486}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.32856320006437, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.981788635253906}, "pipe": "data"} +{"event": "data", "data": {"rate": 277.7649715346783, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32552.375, 81920.0], "load": 0.96, "temperature": 76, "power": 313.92}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.017374038696289}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.94578217543506, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 31/32 (100%)] Loss: 6.997 (7.05) Time: 0.413s, 309.88/s (0.437s, 292.70/s) LR: 1.008e-03 Data: 0.000 (0.023)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.586 (0.586) Loss: 6.8701 (6.8701) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.7812 ( 0.7812)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.166) Loss: 6.7062 (6.8681) Acc@1: 0.0000 ( 0.2665) Acc@5: 3.1250 ( 1.3081)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/jofevozu.2024-04-01_14:01:06.047754/davit_large-multi.0/20240401-181100-davit_large-224/checkpoint-1.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 309.07815843148876, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32796.375, 81920.0], "load": 0.97, "temperature": 76, "power": 315.356}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32796.375, 81920.0], "load": 0.97, "temperature": 76, "power": 327.027}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32796.375, 81920.0], "load": 0.02, "temperature": 71, "power": 90.309}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.885659694671631}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 0/32 ( 0%)] Loss: 6.886 (6.89) Time: 0.934s, 137.09/s (0.934s, 137.09/s) LR: 2.006e-03 Data: 0.521 (0.521)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.915807247161865}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.12496552985056, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33040.375, 81920.0], "load": 0.99, "temperature": 75, "power": 313.931}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.08311096239674, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9318437576293945}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.0254879407564, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9413652420043945}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.4690941953042, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.995604515075684}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33040.375, 81920.0], "load": 0.95, "temperature": 76, "power": 301.429}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.54867659921706, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912203788757324}, "pipe": "data"} +{"event": "data", "data": {"rate": 274.11802023725113, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.013670921325684}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.65998425379854, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33040.375, 81920.0], "load": 0.95, "temperature": 75, "power": 282.757}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.1924175751599, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.896271228790283}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.92601272594055, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.970767974853516}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.6036098603586, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.092811584472656}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33040.375, 81920.0], "load": 0.99, "temperature": 75, "power": 259.866}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.1957425127653, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.033717155456543}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.4176323650478, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 31/32 (100%)] Loss: 6.948 (6.96) Time: 0.414s, 309.00/s (0.438s, 292.23/s) LR: 2.006e-03 Data: 0.001 (0.023)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.608 (0.608) Loss: 6.7530 (6.7530) Acc@1: 0.7812 ( 0.7812) Acc@5: 2.3438 ( 2.3438)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.167) Loss: 6.4966 (6.8226) Acc@1: 3.1250 ( 0.2422) Acc@5: 6.2500 ( 1.0417)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 308.6463216013651, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33284.375, 81920.0], "load": 0.96, "temperature": 76, "power": 300.559}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33284.375, 81920.0], "load": 0.93, "temperature": 77, "power": 318.258}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33284.375, 81920.0], "load": 0.03, "temperature": 71, "power": 86.484}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86574649810791}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 0/32 ( 0%)] Loss: 6.866 (6.87) Time: 0.976s, 131.19/s (0.976s, 131.19/s) LR: 3.004e-03 Data: 0.564 (0.564)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 283.8672338576469, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.860692024230957}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0.98, "temperature": 74, "power": 335.831}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.5004318898384, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.905224323272705}, "pipe": "data"} +{"event": "data", "data": {"rate": 277.67869505331134, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.900017738342285}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.5629655708914, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 1.0, "temperature": 76, "power": 336.372}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.47949594470134, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.911055088043213}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.4081303057337, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.976555824279785}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.810962204735, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.897703170776367}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0.96, "temperature": 76, "power": 309.182}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.1522515852994, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.003688812255859}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.4008188897043, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.037755966186523}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.2432964795452, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0.96, "temperature": 77, "power": 317.225}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 277.24222767903035, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.975908279418945}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.4589098942115, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.015989303588867}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 31/32 (100%)] Loss: 6.998 (6.96) Time: 0.414s, 309.44/s (0.438s, 291.96/s) LR: 3.004e-03 Data: 0.000 (0.025)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 278.7591661186109, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.581 (0.581) Loss: 6.8181 (6.8181) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.166) Loss: 6.3763 (6.8167) Acc@1: 0.0000 ( 0.1938) Acc@5: 9.3750 ( 0.9932)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0.82, "temperature": 73, "power": 88.64}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 309.42760073553944, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33772.375, 81920.0], "load": 0.93, "temperature": 76, "power": 322.627}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33772.375, 81920.0], "load": 0.03, "temperature": 72, "power": 88.348}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8576154708862305}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 0/32 ( 0%)] Loss: 6.858 (6.86) Time: 0.972s, 131.63/s (0.972s, 131.63/s) LR: 4.002e-03 Data: 0.559 (0.559)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.96, "temperature": 76, "power": 325.003}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 277.1487850615177, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.830388069152832}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.49248340630027, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.881205081939697}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.55946640901755, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.920263290405273}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.97, "temperature": 76, "power": 311.211}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.9230804969462, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.918469429016113}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.6206519197784, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.993577003479004}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.1715048053166, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.96, "temperature": 76, "power": 306.864}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.63772925921836, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.038257598876953}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.45594009856416, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0744194984436035}, "pipe": "data"} +{"event": "data", "data": {"rate": 274.3862511720553, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.99, "temperature": 77, "power": 299.436}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.013601303100586}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.75381046726187, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.049444675445557}, "pipe": "data"} +{"event": "data", "data": {"rate": 277.6872067326834, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.013031482696533}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.5670938752776, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 31/32 (100%)] Loss: 7.093 (6.97) Time: 0.414s, 309.52/s (0.438s, 292.03/s) LR: 4.002e-03 Data: 0.000 (0.025)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.99, "temperature": 77, "power": 248.207}}}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.581 (0.581) Loss: 6.8217 (6.8217) Acc@1: 0.7812 ( 0.7812) Acc@5: 2.3438 ( 2.3438)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.166) Loss: 6.7427 (6.8290) Acc@1: 3.1250 ( 0.3634) Acc@5: 3.1250 ( 1.1628)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/jofevozu.2024-04-01_14:01:06.047754/davit_large-multi.0/20240401-181100-davit_large-224/checkpoint-4.pth.tar', 0.3633720930232558)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 309.0505141254495, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34260.375, 81920.0], "load": 0.97, "temperature": 77, "power": 301.61}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34260.375, 81920.0], "load": 0.61, "temperature": 74, "power": 92.662}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34260.375, 81920.0], "load": 0, "temperature": 71, "power": 86.191}}}, "pipe": "data"} +{"event": "stop", "data": null, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large-multi.0-ce1dc503950f5ff93fdadb45d1d68afe.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--batch-size", "128", "--lr-base", "0.01", "--model", "davit_large", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/jofevozu.2024-04-01_14:01:06.047754/davit_large-multi.0", "--checkpoint-hist", "1"], "time": 1711995175.5219054, "return_code": -15}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/davit_large.D0.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/davit_large.D0.data new file mode 100644 index 000000000..9825d6f07 --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/davit_large.D0.data @@ -0,0 +1,255 @@ +{"event": "config", "data": {"argv": {"--amp": true, "--batch-size": 128, "--lr-base": 0.01, "--model": "davit_large"}, "capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "group": "timm", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 600, "name": "davit_large", "plan": {"method": "per_gpu"}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["davit_large", "D0"], "tags": ["classification", "transformer", "vision"], "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 60}}, "weight": 1.0, "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 69, "power": 89.42, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711994943.040454, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large.D0-ce1dc503950f5ff93fdadb45d1d68afe.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--batch-size", "128", "--lr-base", "0.01", "--model", "davit_large", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/jofevozu.2024-04-01_14:01:06.047754/davit_large.D0", "--checkpoint-hist", "1"], "time": 1711994943.0572884}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model davit_large created, param count:196811752\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.005) calculated from base learning rate (0.01) and global batch size (128) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2154.375, 81920.0], "load": 0, "temperature": 64, "power": 79.899}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2242937088012695}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 0/32 ( 0%)] Loss: 7.224 (7.22) Time: 3.285s, 38.96/s (3.285s, 38.96/s) LR: 1.000e-05 Data: 0.743 (0.743)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 7.176398277282715}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 1.0, "temperature": 70, "power": 309.009}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.255929470062256}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.163326740264893}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 0.99, "temperature": 69, "power": 277.562}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.234625339508057}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.24330997467041}, "pipe": "data"} +{"event": "data", "data": {"rate": 296.75802682722394, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0485382080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 0.99, "temperature": 70, "power": 338.096}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 277.6078890393026, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.246696472167969}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.16402335096836, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.151278018951416}, "pipe": "data"} +{"event": "data", "data": {"rate": 277.6342042016642, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 0.99, "temperature": 71, "power": 283.802}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.270854949951172}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.32815594956776, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 277.0020461113209, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.3129425048828125}, "pipe": "data"} +{"event": "data", "data": {"rate": 309.00257069860527, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32478.375, 81920.0], "load": 0.96, "temperature": 72, "power": 307.321}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.311290740966797}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 31/32 (100%)] Loss: 7.311 (7.24) Time: 0.413s, 309.82/s (0.510s, 250.79/s) LR: 1.000e-05 Data: 0.000 (0.031)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.746 (0.746) Loss: 7.1175 (7.1175) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.337 (0.180) Loss: 7.0508 (7.2336) Acc@1: 0.0000 ( 0.0969) Acc@5: 0.0000 ( 0.5329)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/jofevozu.2024-04-01_14:01:06.047754/davit_large.D0/20240401-180909-davit_large-224/checkpoint-0.pth.tar', 0.09689922480620156)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 309.39486561661823, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32722.375, 81920.0], "load": 0.97, "temperature": 72, "power": 328.5}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32722.375, 81920.0], "load": 0.96, "temperature": 73, "power": 303.835}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.413609504699707}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31890.375, 81920.0], "load": 0.01, "temperature": 67, "power": 321.801}}}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 0/32 ( 0%)] Loss: 7.414 (7.41) Time: 0.935s, 136.85/s (0.935s, 136.85/s) LR: 1.008e-03 Data: 0.520 (0.520)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 7.087307929992676}, "pipe": "data"} +{"event": "data", "data": {"rate": 300.58974698564907, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 277.6153580329633, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.022757530212402}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32552.375, 81920.0], "load": 0.96, "temperature": 73, "power": 176.39}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.5108610073315, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.015746116638184}, "pipe": "data"} +{"event": "data", "data": {"rate": 277.56027545021016, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0047407150268555}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.5202296717398, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32552.375, 81920.0], "load": 1.0, "temperature": 73, "power": 309.863}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.067532539367676}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.2052414773846, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.943265914916992}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.5398211690538, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.4327739595213, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.032318115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32552.375, 81920.0], "load": 0.96, "temperature": 74, "power": 290.654}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.386511428409, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.978374004364014}, "pipe": "data"} +{"event": "data", "data": {"rate": 277.5561012248272, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.981762886047363}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.2314645400108, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32552.375, 81920.0], "load": 0.96, "temperature": 74, "power": 307.72}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0168962478637695}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.2420112752666, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 31/32 (100%)] Loss: 6.997 (7.05) Time: 0.413s, 310.23/s (0.437s, 293.00/s) LR: 1.008e-03 Data: 0.001 (0.023)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.592 (0.592) Loss: 6.8691 (6.8691) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.7812 ( 0.7812)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.166) Loss: 6.7061 (6.8682) Acc@1: 0.0000 ( 0.2665) Acc@5: 3.1250 ( 1.2597)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/jofevozu.2024-04-01_14:01:06.047754/davit_large.D0/20240401-180909-davit_large-224/checkpoint-1.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 309.3580913258317, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32796.375, 81920.0], "load": 0.97, "temperature": 75, "power": 312.68}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32796.375, 81920.0], "load": 0.94, "temperature": 75, "power": 326.032}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32796.375, 81920.0], "load": 0, "temperature": 69, "power": 85.303}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.885769844055176}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 0/32 ( 0%)] Loss: 6.886 (6.89) Time: 0.930s, 137.65/s (0.930s, 137.65/s) LR: 2.006e-03 Data: 0.517 (0.517)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 231.029010812849, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.915477752685547}, "pipe": "data"} +{"event": "data", "data": {"rate": 295.60044077012515, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33040.375, 81920.0], "load": 0.99, "temperature": 74, "power": 320.625}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9320220947265625}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.59601130840196, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94178581237793}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.29165021060265, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 274.86341642258645, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9928107261657715}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33040.375, 81920.0], "load": 0.98, "temperature": 75, "power": 262.896}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.61271479415643, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90703010559082}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.894587364301, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.011904716491699}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.20028876827143, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33040.375, 81920.0], "load": 0.98, "temperature": 74, "power": 250.054}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.895601749420166}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.75467381174604, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.972679138183594}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.41428620234024, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 277.0864716848852, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.090322494506836}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33040.375, 81920.0], "load": 0.96, "temperature": 74, "power": 236.523}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.9882886330373, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.036557197570801}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 31/32 (100%)] Loss: 6.942 (6.95) Time: 0.415s, 308.24/s (0.438s, 292.44/s) LR: 2.006e-03 Data: 0.000 (0.024)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 278.4411156975837, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.610 (0.610) Loss: 6.7541 (6.7541) Acc@1: 0.0000 ( 0.0000) Acc@5: 2.3438 ( 2.3438)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.167) Loss: 6.4938 (6.8236) Acc@1: 0.0000 ( 0.2422) Acc@5: 6.2500 ( 1.0659)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 308.22790554346204, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33284.375, 81920.0], "load": 0.95, "temperature": 76, "power": 310.655}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33284.375, 81920.0], "load": 0.96, "temperature": 76, "power": 322.4}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33284.375, 81920.0], "load": 0.03, "temperature": 71, "power": 86.387}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.860050678253174}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 0/32 ( 0%)] Loss: 6.860 (6.86) Time: 0.930s, 137.67/s (0.930s, 137.67/s) LR: 3.004e-03 Data: 0.518 (0.518)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 260.2575818899566, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.851532459259033}, "pipe": "data"} +{"event": "data", "data": {"rate": 261.78508465716, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0.99, "temperature": 75, "power": 273.589}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9167799949646}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.8692964675306, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.62976455503684, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.901655673980713}, "pipe": "data"} +{"event": "data", "data": {"rate": 308.2895393792282, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.913955211639404}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0.99, "temperature": 74, "power": 271.981}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.4926494834154, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.976900577545166}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.8656856574697, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.906073093414307}, "pipe": "data"} +{"event": "data", "data": {"rate": 277.0089127293051, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0.98, "temperature": 76, "power": 330.311}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.998397350311279}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.20258615425126, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.48824631724403, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.037081718444824}, "pipe": "data"} +{"event": "data", "data": {"rate": 307.0750560728848, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 1.0, "temperature": 76, "power": 313.475}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.968621253967285}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.05709050852306, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01994514465332}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 31/32 (100%)] Loss: 7.008 (6.96) Time: 0.417s, 306.63/s (0.437s, 293.04/s) LR: 3.004e-03 Data: 0.000 (0.023)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 309.02794728041164, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.604 (0.604) Loss: 6.8300 (6.8300) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.043 (0.167) Loss: 6.3568 (6.8170) Acc@1: 0.0000 ( 0.0969) Acc@5: 9.3750 ( 0.9932)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 306.60039313931225, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33772.375, 81920.0], "load": 0.97, "temperature": 76, "power": 317.859}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33772.375, 81920.0], "load": 0.97, "temperature": 76, "power": 304.722}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33772.375, 81920.0], "load": 0.03, "temperature": 71, "power": 86.192}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.864753723144531}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 0/32 ( 0%)] Loss: 6.865 (6.86) Time: 0.930s, 137.59/s (0.930s, 137.59/s) LR: 4.002e-03 Data: 0.517 (0.517)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 291.1134548700849, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.826834678649902}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.95, "temperature": 75, "power": 335.274}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 268.5363208701141, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8751139640808105}, "pipe": "data"} +{"event": "data", "data": {"rate": 289.7054354951768, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.915192127227783}, "pipe": "data"} +{"event": "data", "data": {"rate": 275.3368766314093, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.99, "temperature": 76, "power": 304.422}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92592716217041}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.8451166847037, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 273.9832087399864, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.991674423217773}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.58986238248553, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.98, "temperature": 76, "power": 309.583}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.050677299499512}, "pipe": "data"} +{"event": "data", "data": {"rate": 274.35160874098074, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.078275680541992}, "pipe": "data"} +{"event": "data", "data": {"rate": 305.99311151654155, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.011102676391602}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.5777965702307, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34016.375, 81920.0], "load": 0.98, "temperature": 76, "power": 296.268}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043510913848877}, "pipe": "data"} +{"event": "data", "data": {"rate": 306.0579623424577, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 276.16825176617175, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "stop", "data": null, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-davit_large.D0-ce1dc503950f5ff93fdadb45d1d68afe.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--batch-size", "128", "--lr-base", "0.01", "--model", "davit_large", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/jofevozu.2024-04-01_14:01:06.047754/davit_large.D0", "--checkpoint-hist", "1"], "time": 1711995052.112671, "return_code": -15}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/dlrm.0.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/dlrm.0.data new file mode 100644 index 000000000..a1ece4291 --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/dlrm.0.data @@ -0,0 +1,282 @@ +{"event": "config", "data": {"argv": {"--arch-embedding-size": "1000000-1000000-1000000-1000000-1000000-1000000-1000000-1000000", "--arch-interaction-op": "dot", "--arch-mlp-bot": "512-512-64", "--arch-mlp-top": "1024-1024-1024-1", "--arch-sparse-feature-size": 64, "--data-generation": "random", "--mini-batch-size": 16384, "--num-batches": 1000, "--num-indices-per-lookup": 100, "--numpy-rand-seed": "727", "--print-freq": 999999, "--test-mini-batch-size": 16384, "--test-num-workers": 0, "--use-gpu": true}, "capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/dlrm", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/dlrm", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "group": "dlrm", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 600, "name": "dlrm", "plan": {"method": "njobs", "n": 1}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["dlrm", "0"], "tags": ["nlp", "rl"], "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 60}}, "weight": 1.0, "job-number": 0, "devices": ["0"]}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.03, "memory": 0.010771942138671876}, "temperature": 70, "power": 88.238, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711995178.227951, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/dlrm/voirconf-dlrm.0-ce1dc503950f5ff93fdadb45d1d68afe.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/dlrm/dlrm/dlrm_s_pytorch.py", "--arch-embedding-size", "1000000-1000000-1000000-1000000-1000000-1000000-1000000-1000000", "--arch-interaction-op", "dot", "--arch-mlp-bot", "512-512-64", "--arch-mlp-top", "1024-1024-1024-1", "--arch-sparse-feature-size", "64", "--data-generation", "random", "--mini-batch-size", "16384", "--num-batches", "1000", "--num-indices-per-lookup", "100", "--numpy-rand-seed", "727", "--print-freq", "999999", "--test-mini-batch-size", "16384", "--test-num-workers", "0", "--use-gpu"], "time": 1711995178.2452064}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "line", "data": "Unable to import mlperf_logging, No module named 'mlperf_logging'\n", "pipe": "stdout"} +{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:347: UserWarning: torch.distributed.reduce_op is deprecated, please use torch.distributed.ReduceOp instead\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "world size: 1, current rank: 0, local rank: 0\n", "pipe": "stdout"} +{"event": "line", "data": "Using 1 GPU(s)...\n", "pipe": "stdout"} +{"event": "line", "data": "time/loss/accuracy (if enabled):\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 65, "power": 53.183}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 64, "power": 52.499}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [3298.375, 81920.0], "load": 0, "temperature": 64, "power": 79.898}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08395528793334961}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5700.375, 81920.0], "load": 0, "temperature": 63, "power": 78.313}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08342313766479492}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5702.375, 81920.0], "load": 0, "temperature": 62, "power": 77.93}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [5702.375, 81920.0], "load": 0, "temperature": 62, "power": 77.235}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0849374458193779}, "pipe": "data"} +{"event": "data", "data": {"rate": 415823.03075346834, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6106.375, 81920.0], "load": 0, "temperature": 61, "power": 76.747}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08372959494590759}, "pipe": "data"} +{"event": "data", "data": {"rate": 420618.74958907906, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6308.375, 81920.0], "load": 0, "temperature": 60, "power": 76.15}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08384181559085846}, "pipe": "data"} +{"event": "data", "data": {"rate": 414945.17855938955, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6308.375, 81920.0], "load": 0, "temperature": 60, "power": 75.857}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08388040214776993}, "pipe": "data"} +{"event": "data", "data": {"rate": 418804.4849706857, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6308.375, 81920.0], "load": 0, "temperature": 59, "power": 75.161}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08414746820926666}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6308.375, 81920.0], "load": 0, "temperature": 58, "power": 74.097}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 421618.5964681311, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6308.375, 81920.0], "load": 0, "temperature": 58, "power": 73.617}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08342362195253372}, "pipe": "data"} +{"event": "data", "data": {"rate": 416787.10815200635, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 57, "power": 73.5}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08308812975883484}, "pipe": "data"} +{"event": "data", "data": {"rate": 423158.48447483074, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 57, "power": 72.922}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08345107734203339}, "pipe": "data"} +{"event": "data", "data": {"rate": 418353.44978304434, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 56, "power": 72.629}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08459055423736572}, "pipe": "data"} +{"event": "data", "data": {"rate": 418499.7957310262, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0.17, "temperature": 56, "power": 71.641}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 55, "power": 71.543}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08355697244405746}, "pipe": "data"} +{"event": "data", "data": {"rate": 417787.61364784435, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 55, "power": 71.336}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08368614315986633}, "pipe": "data"} +{"event": "data", "data": {"rate": 423177.5566914964, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 54, "power": 71.042}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08400876820087433}, "pipe": "data"} +{"event": "data", "data": {"rate": 419203.72681938234, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 54, "power": 70.75}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08406656980514526}, "pipe": "data"} +{"event": "data", "data": {"rate": 416058.42289123783, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 54, "power": 70.554}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08438296616077423}, "pipe": "data"} +{"event": "data", "data": {"rate": 418660.3222319951, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 55, "power": 70.249}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 53, "power": 69.957}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08476312458515167}, "pipe": "data"} +{"event": "data", "data": {"rate": 414307.25545971945, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 52, "power": 69.957}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08354128897190094}, "pipe": "data"} +{"event": "data", "data": {"rate": 416872.92131256446, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6510.375, 81920.0], "load": 0, "temperature": 52, "power": 69.566}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08408722281455994}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 52, "power": 68.87}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 416508.5227395502, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08347434550523758}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0.01, "temperature": 53, "power": 81.003}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 416358.9420681157, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 51, "power": 69.468}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08404223620891571}, "pipe": "data"} +{"event": "data", "data": {"rate": 419545.68712718395, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 51, "power": 68.871}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08309965580701828}, "pipe": "data"} +{"event": "data", "data": {"rate": 422757.8737879901, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 51, "power": 68.87}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08461501449346542}, "pipe": "data"} +{"event": "data", "data": {"rate": 416555.39200242126, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 50, "power": 69.065}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08360610902309418}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0.17, "temperature": 50, "power": 68.077}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 416479.8620776504, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 50, "power": 68.37}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08310449123382568}, "pipe": "data"} +{"event": "data", "data": {"rate": 413098.1669147046, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 49, "power": 68.37}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08348263800144196}, "pipe": "data"} +{"event": "data", "data": {"rate": 418912.6266744712, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 49, "power": 68.174}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08347773551940918}, "pipe": "data"} +{"event": "data", "data": {"rate": 418051.8696122143, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6712.375, 81920.0], "load": 0, "temperature": 49, "power": 67.999}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0839482992887497}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 49, "power": 68.078}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 419175.70213657053, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 48, "power": 67.284}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08302360773086548}, "pipe": "data"} +{"event": "data", "data": {"rate": 418613.5663806294, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 48, "power": 66.606}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08247873932123184}, "pipe": "data"} +{"event": "data", "data": {"rate": 418216.0024390488, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 48, "power": 66.509}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08412615209817886}, "pipe": "data"} +{"event": "data", "data": {"rate": 421491.38903691224, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 48, "power": 66.509}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08320017158985138}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 48, "power": 66.49}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 415696.2267346869, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08296072483062744}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 47, "power": 66.216}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 424858.06579579576, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 47, "power": 66.296}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08382801711559296}, "pipe": "data"} +{"event": "data", "data": {"rate": 419379.1243597326, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 47, "power": 66.118}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08423185348510742}, "pipe": "data"} +{"event": "data", "data": {"rate": 423802.260032123, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [6916.375, 81920.0], "load": 0, "temperature": 47, "power": 65.923}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08362126350402832}, "pipe": "data"} +{"event": "data", "data": {"rate": 415462.61371526687, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 46, "power": 65.905}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08386819064617157}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0.11, "temperature": 46, "power": 67.063}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 418893.6263560384, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 46, "power": 65.813}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08274102210998535}, "pipe": "data"} +{"event": "data", "data": {"rate": 418541.7254219137, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 46, "power": 66.118}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08267556130886078}, "pipe": "data"} +{"event": "data", "data": {"rate": 420535.543184642, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 46, "power": 65.728}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0836000144481659}, "pipe": "data"} +{"event": "data", "data": {"rate": 417120.7815434034, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 46, "power": 65.032}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08373723179101944}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0.08, "temperature": 45, "power": 65.618}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 416515.00290396175, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 45, "power": 65.404}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0839206874370575}, "pipe": "data"} +{"event": "data", "data": {"rate": 420930.6101644054, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 45, "power": 65.618}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08366947621107101}, "pipe": "data"} +{"event": "data", "data": {"rate": 420867.43130586325, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 45, "power": 65.422}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08206789195537567}, "pipe": "data"} +{"event": "data", "data": {"rate": 422567.86838702817, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 45, "power": 65.422}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08495134860277176}, "pipe": "data"} +{"event": "data", "data": {"rate": 413918.5676356051, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 45, "power": 65.728}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0824621394276619}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 45, "power": 65.227}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 418853.3286798619, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 44, "power": 65.227}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08413124084472656}, "pipe": "data"} +{"event": "data", "data": {"rate": 415626.70146863814, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 44, "power": 64.726}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08249908685684204}, "pipe": "data"} +{"event": "data", "data": {"rate": 418652.69467810245, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 44, "power": 64.727}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08293187618255615}, "pipe": "data"} +{"event": "data", "data": {"rate": 420076.7563100483, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 44, "power": 64.434}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08295343071222305}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0.18, "temperature": 44, "power": 64.629}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 419200.2087813539, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 44, "power": 64.239}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08349806070327759}, "pipe": "data"} +{"event": "data", "data": {"rate": 415165.37488338683, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 44, "power": 64.727}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08397036790847778}, "pipe": "data"} +{"event": "data", "data": {"rate": 419396.01086917595, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 44, "power": 65.032}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08261168003082275}, "pipe": "data"} +{"event": "data", "data": {"rate": 420730.19646410475, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 44, "power": 64.629}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08328827470541}, "pipe": "data"} +{"event": "data", "data": {"rate": 418250.3477995178, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 44, "power": 65.13}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 43, "power": 64.629}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08417266607284546}, "pipe": "data"} +{"event": "data", "data": {"rate": 421778.1915240604, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 43, "power": 64.532}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08396507054567337}, "pipe": "data"} +{"event": "data", "data": {"rate": 424591.3107792523, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 43, "power": 64.434}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08414334058761597}, "pipe": "data"} +{"event": "data", "data": {"rate": 414900.0157815973, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 43, "power": 64.629}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.0827147513628006}, "pipe": "data"} +{"event": "data", "data": {"rate": 422280.2851886814, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 43, "power": 64.532}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08464237302541733}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 43, "power": 64.434}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 419771.21392767277, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 43, "power": 64.434}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08371055126190186}, "pipe": "data"} +{"event": "data", "data": {"rate": 421941.5259196974, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 43, "power": 63.933}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08391377329826355}, "pipe": "data"} +{"event": "data", "data": {"rate": 418315.2745509309, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 43, "power": 63.933}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08393114805221558}, "pipe": "data"} +{"event": "data", "data": {"rate": 418508.5508801694, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 43, "power": 64.239}}}, "pipe": "data"} +{"event": "data", "data": {"loss": 0.08339859545230865}, "pipe": "data"} +{"event": "data", "data": {"rate": 419426.18023708585, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0.1, "temperature": 43, "power": 64.531}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 43, "power": 64.531}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [7120.375, 81920.0], "load": 0, "temperature": 43, "power": 64.532}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/dlrm/voirconf-dlrm.0-ce1dc503950f5ff93fdadb45d1d68afe.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/dlrm/dlrm/dlrm_s_pytorch.py", "--arch-embedding-size", "1000000-1000000-1000000-1000000-1000000-1000000-1000000-1000000", "--arch-interaction-op", "dot", "--arch-mlp-bot", "512-512-64", "--arch-mlp-top", "1024-1024-1024-1", "--arch-sparse-feature-size", "64", "--data-generation", "random", "--mini-batch-size", "16384", "--num-batches", "1000", "--num-indices-per-lookup", "100", "--numpy-rand-seed", "727", "--print-freq", "999999", "--test-mini-batch-size", "16384", "--test-num-workers", "0", "--use-gpu"], "time": 1711995426.7475517, "return_code": 0}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/focalnet.D0.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/focalnet.D0.data new file mode 100644 index 000000000..febc8cb70 --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/focalnet.D0.data @@ -0,0 +1,264 @@ +{"event": "config", "data": {"argv": {"--amp": true, "--model": "focalnet_base_lrf"}, "capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "group": "timm", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 600, "name": "focalnet", "plan": {"method": "per_gpu"}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["focalnet", "D0"], "tags": ["classification", "convnet", "vision"], "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 60}}, "weight": 2.0, "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 43, "power": 68.285, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711995429.442004, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-focalnet.D0-ce1dc503950f5ff93fdadb45d1d68afe.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "focalnet_base_lrf", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/jofevozu.2024-04-01_14:01:06.047754/focalnet.D0", "--checkpoint-hist", "1"], "time": 1711995429.458921}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model focalnet_base_lrf created, param count:88749768\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.9\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.05) calculated from base learning rate (0.1) and global batch size (128) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 7.004467010498047}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4344.375, 81920.0], "load": 1.0, "temperature": 47, "power": 229.603}}}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 0/32 ( 0%)] Loss: 7.004 (7.00) Time: 11.494s, 11.14/s (11.494s, 11.14/s) LR: 1.000e-05 Data: 0.711 (0.711)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [21986.375, 81920.0], "load": 1.0, "temperature": 46, "power": 124.33}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [16646.375, 81920.0], "load": 1.0, "temperature": 48, "power": 180.027}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [14424.375, 81920.0], "load": 1.0, "temperature": 48, "power": 164.248}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.006728649139404}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.935497760772705}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23792.375, 81920.0], "load": 0.99, "temperature": 52, "power": 302.287}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.995425701141357}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.060293197631836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.056240081787109}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23796.375, 81920.0], "load": 0.95, "temperature": 53, "power": 258.593}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 379.210060452384, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.029513359069824}, "pipe": "data"} +{"event": "data", "data": {"rate": 367.35666213001923, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.975472450256348}, "pipe": "data"} +{"event": "data", "data": {"rate": 387.7241667121378, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23796.375, 81920.0], "load": 0.95, "temperature": 54, "power": 311.737}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.069399833679199}, "pipe": "data"} +{"event": "data", "data": {"rate": 394.6147537037038, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 31/32 (100%)] Loss: 7.005 (7.00) Time: 0.317s, 403.56/s (0.674s, 189.83/s) LR: 1.000e-05 Data: 0.000 (0.030)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.691 (0.691) Loss: 6.9615 (6.9615) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.984 (0.158) Loss: 6.8640 (6.9459) Acc@1: 0.0000 ( 0.1453) Acc@5: 3.1250 ( 0.6541)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/jofevozu.2024-04-01_14:01:06.047754/focalnet.D0/20240401-181714-focalnet_base_lrf-224/checkpoint-0.pth.tar', 0.14534883720930233)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 402.510907906717, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24040.375, 81920.0], "load": 0.79, "temperature": 55, "power": 307.711}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24040.375, 81920.0], "load": 0.96, "temperature": 56, "power": 310.584}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.020679473876953}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 0/32 ( 0%)] Loss: 7.021 (7.02) Time: 0.850s, 150.58/s (0.850s, 150.58/s) LR: 1.001e-02 Data: 0.530 (0.530)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 334.07324467151216, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23912.375, 81920.0], "load": 0.98, "temperature": 56, "power": 295.282}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.986514091491699}, "pipe": "data"} +{"event": "data", "data": {"rate": 383.0743808212909, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.065659523010254}, "pipe": "data"} +{"event": "data", "data": {"rate": 387.1175470303891, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23916.375, 81920.0], "load": 0.99, "temperature": 57, "power": 285.376}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 396.1337576333015, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07792854309082}, "pipe": "data"} +{"event": "data", "data": {"rate": 378.6523877482334, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.056331634521484}, "pipe": "data"} +{"event": "data", "data": {"rate": 365.3493052378703, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.036350250244141}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23916.375, 81920.0], "load": 0.95, "temperature": 58, "power": 288.664}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 388.90933663280975, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.098367214202881}, "pipe": "data"} +{"event": "data", "data": {"rate": 391.64305161905696, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 378.2027158874914, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.144101142883301}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23916.375, 81920.0], "load": 0.95, "temperature": 59, "power": 280.276}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 367.51007012046193, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 31/32 (100%)] Loss: 7.108 (7.05) Time: 0.317s, 403.23/s (0.341s, 375.03/s) LR: 1.001e-02 Data: 0.000 (0.023)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.571 (0.571) Loss: 6.8922 (6.8922) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.030 (0.127) Loss: 6.9392 (6.9699) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 0.9932)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/jofevozu.2024-04-01_14:01:06.047754/focalnet.D0/20240401-181714-focalnet_base_lrf-224/checkpoint-1.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 402.7932877450317, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24160.375, 81920.0], "load": 0.93, "temperature": 60, "power": 320.082}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24160.375, 81920.0], "load": 0.03, "temperature": 56, "power": 72.219}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.995892524719238}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 0/32 ( 0%)] Loss: 6.996 (7.00) Time: 0.843s, 151.79/s (0.843s, 151.79/s) LR: 2.001e-02 Data: 0.525 (0.525)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 368.1823715209601, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.097538948059082}, "pipe": "data"} +{"event": "data", "data": {"rate": 390.0060983014103, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24404.375, 81920.0], "load": 0.99, "temperature": 61, "power": 303.562}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0097575187683105}, "pipe": "data"} +{"event": "data", "data": {"rate": 384.1133568450786, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.010951519012451}, "pipe": "data"} +{"event": "data", "data": {"rate": 377.8335115005289, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 363.82561704014563, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.23178768157959}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24404.375, 81920.0], "load": 0.95, "temperature": 62, "power": 284.086}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.252410411834717}, "pipe": "data"} +{"event": "data", "data": {"rate": 389.30400713952525, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 385.3307818561956, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.190232276916504}, "pipe": "data"} +{"event": "data", "data": {"rate": 379.7666706652603, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24404.375, 81920.0], "load": 0.95, "temperature": 63, "power": 308.907}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.219778537750244}, "pipe": "data"} +{"event": "data", "data": {"rate": 362.5945532575905, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.261171340942383}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 31/32 (100%)] Loss: 7.261 (7.13) Time: 0.319s, 400.92/s (0.342s, 374.26/s) LR: 2.001e-02 Data: 0.000 (0.023)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.549 (0.549) Loss: 6.9301 (6.9301) Acc@1: 0.0000 ( 0.0000) Acc@5: 6.2500 ( 6.2500)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.030 (0.126) Loss: 6.7456 (7.1150) Acc@1: 0.0000 ( 0.1696) Acc@5: 0.0000 ( 0.8479)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 401.46855598176853, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24648.375, 81920.0], "load": 0.52, "temperature": 62, "power": 302.957}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24648.375, 81920.0], "load": 0.96, "temperature": 64, "power": 313.843}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.089275360107422}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 0/32 ( 0%)] Loss: 7.089 (7.09) Time: 0.881s, 145.25/s (0.881s, 145.25/s) LR: 3.000e-02 Data: 0.564 (0.564)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 337.765767530763, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2314229011535645}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24892.375, 81920.0], "load": 0.98, "temperature": 64, "power": 327.532}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 370.2530617872052, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.042392730712891}, "pipe": "data"} +{"event": "data", "data": {"rate": 388.7907823695308, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 386.32675992415574, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.210953712463379}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24892.375, 81920.0], "load": 0.96, "temperature": 64, "power": 299.78}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 382.3681464312274, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.373247146606445}, "pipe": "data"} +{"event": "data", "data": {"rate": 368.392645499021, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.324443817138672}, "pipe": "data"} +{"event": "data", "data": {"rate": 391.8792267982421, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24892.375, 81920.0], "load": 0.95, "temperature": 65, "power": 305.142}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.433285713195801}, "pipe": "data"} +{"event": "data", "data": {"rate": 389.580549503213, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 381.24461667584785, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.273014068603516}, "pipe": "data"} +{"event": "data", "data": {"rate": 378.8654229457391, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 31/32 (100%)] Loss: 7.239 (7.23) Time: 0.320s, 400.57/s (0.344s, 372.14/s) LR: 3.000e-02 Data: 0.000 (0.025)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.547 (0.547) Loss: 7.1708 (7.1708) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.7812 ( 0.7812)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.031 (0.126) Loss: 6.2410 (7.1746) Acc@1: 0.0000 ( 0.2180) Acc@5: 25.0000 ( 1.0174)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24892.375, 81920.0], "load": 0.99, "temperature": 65, "power": 221.044}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 400.92659335240495, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25136.375, 81920.0], "load": 0.94, "temperature": 66, "power": 271.083}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25136.375, 81920.0], "load": 0, "temperature": 61, "power": 76.65}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.215184211730957}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 0/32 ( 0%)] Loss: 7.215 (7.22) Time: 0.851s, 150.43/s (0.851s, 150.43/s) LR: 4.000e-02 Data: 0.532 (0.532)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25306.375, 81920.0], "load": 0.52, "temperature": 64, "power": 304.058}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 358.70955515436015, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.294953346252441}, "pipe": "data"} +{"event": "data", "data": {"rate": 387.2059217660317, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.311878681182861}, "pipe": "data"} +{"event": "data", "data": {"rate": 389.6306104942517, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25380.375, 81920.0], "load": 0.95, "temperature": 66, "power": 266.198}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.18798828125}, "pipe": "data"} +{"event": "data", "data": {"rate": 381.7221027444743, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 380.3718146969613, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.4081525802612305}, "pipe": "data"} +{"event": "data", "data": {"rate": 365.6822526454916, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25380.375, 81920.0], "load": 0.99, "temperature": 66, "power": 272.963}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.32265567779541}, "pipe": "data"} +{"event": "data", "data": {"rate": 395.4273115218374, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.463779449462891}, "pipe": "data"} +{"event": "data", "data": {"rate": 389.5113944951221, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.423940658569336}, "pipe": "data"} +{"event": "data", "data": {"rate": 380.5171127875226, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25380.375, 81920.0], "load": 0.96, "temperature": 67, "power": 311.416}}}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 31/32 (100%)] Loss: 7.405 (7.33) Time: 0.319s, 401.18/s (0.343s, 372.92/s) LR: 4.000e-02 Data: 0.000 (0.024)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 376.7717104658269, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.577 (0.577) Loss: 6.9678 (6.9678) Acc@1: 0.0000 ( 0.0000) Acc@5: 4.6875 ( 4.6875)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.031 (0.128) Loss: 6.8193 (7.2572) Acc@1: 0.0000 ( 0.1211) Acc@5: 3.1250 ( 0.7025)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 401.10304339691766, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25624.375, 81920.0], "load": 0.94, "temperature": 67, "power": 319.402}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25624.375, 81920.0], "load": 0.03, "temperature": 63, "power": 78.618}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.209873676300049}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 0/32 ( 0%)] Loss: 7.210 (7.21) Time: 0.852s, 150.19/s (0.852s, 150.19/s) LR: 4.997e-02 Data: 0.532 (0.532)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 375.99030706388146, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.401329040527344}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25868.375, 81920.0], "load": 1.0, "temperature": 67, "power": 199.522}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 364.573381787427, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.457070350646973}, "pipe": "data"} +{"event": "data", "data": {"rate": 391.07174558478073, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.710537910461426}, "pipe": "data"} +{"event": "data", "data": {"rate": 376.8235366760082, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25868.375, 81920.0], "load": 0.95, "temperature": 68, "power": 296.72}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.548340320587158}, "pipe": "data"} +{"event": "data", "data": {"rate": 386.187715950652, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 375.66394162684793, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.613376617431641}, "pipe": "data"} +{"event": "data", "data": {"rate": 374.4184988198875, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.288519859313965}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25868.375, 81920.0], "load": 0.95, "temperature": 69, "power": 296.747}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 389.0179343604477, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.529942512512207}, "pipe": "data"} +{"event": "data", "data": {"rate": 380.01434870845, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 31/32 (100%)] Loss: 7.194 (7.44) Time: 0.320s, 400.31/s (0.344s, 372.17/s) LR: 4.997e-02 Data: 0.000 (0.024)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/32] Time: 0.555 (0.555) Loss: 7.3009 (7.3009) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 32/32] Time: 0.031 (0.127) Loss: 6.1908 (7.2211) Acc@1: 0.0000 ( 0.2180) Acc@5: 21.8750 ( 0.9932)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 399.7605829704417, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [26112.375, 81920.0], "load": 0.52, "temperature": 68, "power": 303.609}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [26112.375, 81920.0], "load": 0.97, "temperature": 70, "power": 256.329}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.066534042358398}, "pipe": "data"} +{"event": "line", "data": "Train: 6 [ 0/32 ( 0%)] Loss: 7.067 (7.07) Time: 0.866s, 147.73/s (0.866s, 147.73/s) LR: 4.995e-02 Data: 0.546 (0.546)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 348.918121030995, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [26356.375, 81920.0], "load": 0.94, "temperature": 69, "power": 294.708}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.062187194824219}, "pipe": "data"} +{"event": "data", "data": {"rate": 371.44209833274886, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 378.73679325569196, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "stop", "data": null, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-focalnet.D0-ce1dc503950f5ff93fdadb45d1d68afe.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--model", "focalnet_base_lrf", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/jofevozu.2024-04-01_14:01:06.047754/focalnet.D0", "--checkpoint-hist", "1"], "time": 1711995549.6997116, "return_code": -15}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/fp16.D0.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/fp16.D0.data new file mode 100644 index 000000000..cab9707c5 --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/fp16.D0.data @@ -0,0 +1,142 @@ +{"event": "config", "data": {"argv": {"--dtype": "fp16", "--m": 8192, "--n": 8192, "--number": 30, "--repeat": 90}, "capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "group": "flops", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 600, "name": "fp16", "plan": {"method": "per_gpu"}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["fp16", "D0"], "tags": ["diagnostic", "flops"], "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 60}}, "weight": 0.0, "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.1, "memory": 0.010771942138671876}, "temperature": 66, "power": 84.901, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711995552.369801, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--dtype", "fp16", "--m", "8192", "--n", "8192", "--number", "30", "--repeat", "90"], "time": 1711995552.380318}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 219.27240309947274, "units": "Tflops", "t": 1711995554.3533254}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [882.5, 81920.0], "load": 0, "temperature": 62, "power": 52.083}}, "t": 1711995553.6909907}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 0.01, "temperature": 66, "power": 79.802}}, "t": 1711995554.1998236}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 256.6377361675375, "units": "Tflops", "t": 1711995554.611047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.64465151012828, "units": "Tflops", "t": 1711995554.8732636}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 68, "power": 261.021}}, "t": 1711995554.7097375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.69778414317645, "units": "Tflops", "t": 1711995555.1365547}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 249.34412147400172, "units": "Tflops", "t": 1711995555.4011881}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 67, "power": 215.561}}, "t": 1711995555.2250934}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.22292612519328, "units": "Tflops", "t": 1711995555.6628773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.9228473349695, "units": "Tflops", "t": 1711995555.9217365}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 68, "power": 260.246}}, "t": 1711995555.7336102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.7069603706751, "units": "Tflops", "t": 1711995556.1808515}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.77372054892967, "units": "Tflops", "t": 1711995556.4408658}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 69, "power": 303.853}}, "t": 1711995556.2411973}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.12120209648864, "units": "Tflops", "t": 1711995556.7036784}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.61377120575176, "units": "Tflops", "t": 1711995556.9669683}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 68, "power": 267.571}}, "t": 1711995556.7487803}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.121576491874, "units": "Tflops", "t": 1711995557.2287292}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.40065416241973, "units": "Tflops", "t": 1711995557.4880996}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 68, "power": 210.026}}, "t": 1711995557.255849}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.82237379856227, "units": "Tflops", "t": 1711995557.7481294}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.53328914369266, "units": "Tflops", "t": 1711995558.0083938}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 68, "power": 265.032}}, "t": 1711995557.7634885}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 249.6653948302637, "units": "Tflops", "t": 1711995558.2727475}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.3977283495343, "units": "Tflops", "t": 1711995558.535224}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 69, "power": 303.168}}, "t": 1711995558.2728279}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.7350825578827, "units": "Tflops", "t": 1711995558.797396}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 69, "power": 260.333}}, "t": 1711995558.78175}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.1628397538496, "units": "Tflops", "t": 1711995559.0580864}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 255.23629884534725, "units": "Tflops", "t": 1711995559.316628}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 68, "power": 225.201}}, "t": 1711995559.2898042}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.57116055344014, "units": "Tflops", "t": 1711995559.5769355}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.61330199074962, "units": "Tflops", "t": 1711995559.8392031}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 68, "power": 270.117}}, "t": 1711995559.7981925}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.8036752107334, "units": "Tflops", "t": 1711995560.102367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.73458591872796, "units": "Tflops", "t": 1711995560.365531}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 69, "power": 311.429}}, "t": 1711995560.3074045}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.73627137045446, "units": "Tflops", "t": 1711995560.6246111}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 255.9630359342503, "units": "Tflops", "t": 1711995560.882407}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 69, "power": 273.511}}, "t": 1711995560.8160634}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.72904117567958, "units": "Tflops", "t": 1711995561.1425326}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.898366820498, "units": "Tflops", "t": 1711995561.4034529}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 69, "power": 285.633}}, "t": 1711995561.3249464}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 255.35666366027212, "units": "Tflops", "t": 1711995561.661924}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.55048089687335, "units": "Tflops", "t": 1711995561.9221878}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 69, "power": 260.15}}, "t": 1711995561.835052}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.1251876823212, "units": "Tflops", "t": 1711995562.18189}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 249.9705593423672, "units": "Tflops", "t": 1711995562.4458606}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 68, "power": 310.556}}, "t": 1711995562.343462}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.5202878985205, "units": "Tflops", "t": 1711995562.7093174}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.59934595869066, "units": "Tflops", "t": 1711995562.9715755}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 69, "power": 268.346}}, "t": 1711995562.8543684}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.86625896738911, "units": "Tflops", "t": 1711995563.23052}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.64877100571167, "units": "Tflops", "t": 1711995563.4927402}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 69, "power": 299.869}}, "t": 1711995563.363124}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.07061715352563, "units": "Tflops", "t": 1711995563.7556276}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 248.69242538866973, "units": "Tflops", "t": 1711995564.0209625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 69, "power": 272.467}}, "t": 1711995563.8703065}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.08804104238766, "units": "Tflops", "t": 1711995564.282771}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 255.14921704065338, "units": "Tflops", "t": 1711995564.5413923}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 69, "power": 300.06}}, "t": 1711995564.3776746}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.8165686034052, "units": "Tflops", "t": 1711995564.8024566}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.92194562214084, "units": "Tflops", "t": 1711995565.0633557}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 70, "power": 234.52}}, "t": 1711995564.8856928}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.14184990796525, "units": "Tflops", "t": 1711995565.327206}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 249.2738127613272, "units": "Tflops", "t": 1711995565.5919197}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 69, "power": 236.99}}, "t": 1711995565.393252}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.0088317528368, "units": "Tflops", "t": 1711995565.8538082}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.04445998623123, "units": "Tflops", "t": 1711995566.1135447}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 68, "power": 308.217}}, "t": 1711995565.9007244}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.58417179818116, "units": "Tflops", "t": 1711995566.3748286}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.59515974413796, "units": "Tflops", "t": 1711995566.6381462}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 69, "power": 269.145}}, "t": 1711995566.410881}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 248.54699971313764, "units": "Tflops", "t": 1711995566.9036863}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.89420631185763, "units": "Tflops", "t": 1711995567.1646016}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 69, "power": 299.648}}, "t": 1711995566.9191437}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 255.73380238213443, "units": "Tflops", "t": 1711995567.422684}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.8098699643978, "units": "Tflops", "t": 1711995567.6836996}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 70, "power": 295.561}}, "t": 1711995567.427273}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 246.99859237783866, "units": "Tflops", "t": 1711995567.950911}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 69, "power": 281.457}}, "t": 1711995567.9358552}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 249.5922031454108, "units": "Tflops", "t": 1711995568.2153273}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.66757701029, "units": "Tflops", "t": 1711995568.474431}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 69, "power": 299.662}}, "t": 1711995568.4447181}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.6283376053472, "units": "Tflops", "t": 1711995568.734646}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.74336481177073, "units": "Tflops", "t": 1711995568.9957182}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 70, "power": 277.661}}, "t": 1711995568.9523091}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.56520563614797, "units": "Tflops", "t": 1711995569.2591097}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 247.64453278492272, "units": "Tflops", "t": 1711995569.5255542}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 70, "power": 294.899}}, "t": 1711995569.462289}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.69687559402612, "units": "Tflops", "t": 1711995569.7888184}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 255.59844249626653, "units": "Tflops", "t": 1711995570.046984}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 69, "power": 234.84}}, "t": 1711995569.9709666}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.62222144627057, "units": "Tflops", "t": 1711995570.308227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.40412394220664, "units": "Tflops", "t": 1711995570.5706923}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 70, "power": 243.261}}, "t": 1711995570.479247}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.61377120575176, "units": "Tflops", "t": 1711995570.8340292}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.48853759995805, "units": "Tflops", "t": 1711995571.0974717}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 70, "power": 295.282}}, "t": 1711995570.9877858}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 258.5506471267017, "units": "Tflops", "t": 1711995571.3527477}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.6789847616548, "units": "Tflops", "t": 1711995571.6149256}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 70, "power": 278.612}}, "t": 1711995571.5057828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.54973414120715, "units": "Tflops", "t": 1711995571.8741915}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.66452999602706, "units": "Tflops", "t": 1711995572.1332922}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 70, "power": 292.932}}, "t": 1711995572.0136158}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.49426787062345, "units": "Tflops", "t": 1711995572.3936658}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.8386889229332, "units": "Tflops", "t": 1711995572.6567307}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 70, "power": 230.515}}, "t": 1711995572.520799}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 248.62717501198946, "units": "Tflops", "t": 1711995572.922189}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.06127711862644, "units": "Tflops", "t": 1711995573.18501}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 70, "power": 265.825}}, "t": 1711995573.0279853}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.2113382242795, "units": "Tflops", "t": 1711995573.444622}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.25870467068927, "units": "Tflops", "t": 1711995573.707242}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 70, "power": 301.917}}, "t": 1711995573.5355964}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.3568600231136, "units": "Tflops", "t": 1711995573.9677463}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 247.95410592680702, "units": "Tflops", "t": 1711995574.2338696}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 70, "power": 273.736}}, "t": 1711995574.043394}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 249.27785499931377, "units": "Tflops", "t": 1711995574.4986176}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.25034696770132, "units": "Tflops", "t": 1711995574.758142}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 69, "power": 223.647}}, "t": 1711995574.551932}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 253.87966421013044, "units": "Tflops", "t": 1711995575.0181131}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.79585210819735, "units": "Tflops", "t": 1711995575.277081}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 70, "power": 272.471}}, "t": 1711995575.0597546}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.2669351503893, "units": "Tflops", "t": 1711995575.536656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.16423896474143, "units": "Tflops", "t": 1711995575.8004344}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 304.041}}, "t": 1711995575.567736}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 251.77264771052845, "units": "Tflops", "t": 1711995576.062575}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 252.5465946106977, "units": "Tflops", "t": 1711995576.3238597}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 71, "power": 267.089}}, "t": 1711995576.075545}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 255.645436055781, "units": "Tflops", "t": 1711995576.5820327}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 254.6089933607264, "units": "Tflops", "t": 1711995576.8412104}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 70, "power": 235.11}}, "t": 1711995576.583713}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 250.38608440667096, "units": "Tflops", "t": 1711995577.1048021}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 70, "power": 288.771}}, "t": 1711995577.0928223}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 248.8899492828305, "units": "Tflops", "t": 1711995577.3699596}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 249.1770627538527, "units": "Tflops", "t": 1711995577.6347787}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1804.375, 81920.0], "load": 1.0, "temperature": 70, "power": 305.016}}, "t": 1711995577.6017532}, "pipe": "data"} +{"event": "end", "data": {"command": ["/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--dtype", "fp16", "--m", "8192", "--n", "8192", "--number", "30", "--repeat", "90"], "time": 1711995578.5540898, "return_code": 0}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/fp32.D0.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/fp32.D0.data new file mode 100644 index 000000000..2f5216628 --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/fp32.D0.data @@ -0,0 +1,299 @@ +{"event": "config", "data": {"argv": {"--dtype": "fp32", "--m": 8192, "--n": 8192, "--number": 10, "--repeat": 90}, "capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "group": "flops", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 600, "name": "fp32", "plan": {"method": "per_gpu"}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["fp32", "D0"], "tags": ["diagnostic", "flops"], "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 60}}, "weight": 0.0, "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 65, "power": 84.51, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711995581.225396, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--dtype", "fp32", "--m", "8192", "--n", "8192", "--number", "10", "--repeat", "90"], "time": 1711995581.2349355}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 18.85672317769435, "units": "Tflops", "t": 1711995584.095919}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [882.5, 81920.0], "load": 0, "temperature": 63, "power": 52.193}}, "t": 1711995582.5631711}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 0.02, "temperature": 66, "power": 285.457}}, "t": 1711995583.0742717}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 69, "power": 272.567}}, "t": 1711995583.5847347}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 279.114}}, "t": 1711995584.0925946}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.18860230277818, "units": "Tflops", "t": 1711995585.2426226}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 286.407}}, "t": 1711995584.6003432}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 286.715}}, "t": 1711995585.106878}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188722065297227, "units": "Tflops", "t": 1711995586.3888028}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 288.256}}, "t": 1711995585.613426}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 282.349}}, "t": 1711995586.1201832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188738033746052, "units": "Tflops", "t": 1711995587.535}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 290.327}}, "t": 1711995586.629493}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 280.182}}, "t": 1711995587.1361732}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.1889935325418, "units": "Tflops", "t": 1711995588.6811514}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 70, "power": 294.707}}, "t": 1711995587.645185}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 279.116}}, "t": 1711995588.1544607}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 279.229}}, "t": 1711995588.6611714}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188765978595445, "units": "Tflops", "t": 1711995589.827344}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 281.246}}, "t": 1711995589.1691022}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 290.524}}, "t": 1711995589.6757162}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188722065297227, "units": "Tflops", "t": 1711995590.9735332}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 283.595}}, "t": 1711995590.183738}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 289.43}}, "t": 1711995590.6912858}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188658191767697, "units": "Tflops", "t": 1711995592.1196976}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 289.15}}, "t": 1711995591.1978714}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 283.894}}, "t": 1711995591.705647}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188606294838063, "units": "Tflops", "t": 1711995593.2658696}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 294.212}}, "t": 1711995592.2133088}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 292.367}}, "t": 1711995592.7235408}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 281.544}}, "t": 1711995593.233829}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.18865419968622, "units": "Tflops", "t": 1711995594.4121222}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 296.361}}, "t": 1711995593.742996}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 71, "power": 279.997}}, "t": 1711995594.2520812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188462581728725, "units": "Tflops", "t": 1711995595.5583475}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 293.324}}, "t": 1711995594.7612529}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 292.079}}, "t": 1711995595.2678518}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.18858234250368, "units": "Tflops", "t": 1711995596.704563}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 283.987}}, "t": 1711995595.7756255}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 287.39}}, "t": 1711995596.2821927}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.18865419968622, "units": "Tflops", "t": 1711995597.850736}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 285.545}}, "t": 1711995596.7898185}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 290.901}}, "t": 1711995597.2964518}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 283.788}}, "t": 1711995597.8038597}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188590326608495, "units": "Tflops", "t": 1711995598.9969845}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 294.117}}, "t": 1711995598.3104599}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 283.785}}, "t": 1711995598.8201141}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188694120575736, "units": "Tflops", "t": 1711995600.1431682}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 295.295}}, "t": 1711995599.3310628}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 72, "power": 281.458}}, "t": 1711995599.8402288}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188857797959393, "units": "Tflops", "t": 1711995601.2893202}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 282.225}}, "t": 1711995600.3493347}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 294.594}}, "t": 1711995600.8602152}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.18882186853842, "units": "Tflops", "t": 1711995602.435481}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 293.43}}, "t": 1711995601.366823}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 285.44}}, "t": 1711995601.8738499}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 292.848}}, "t": 1711995602.3817515}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188769970723435, "units": "Tflops", "t": 1711995603.5816789}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 288.863}}, "t": 1711995602.8893883}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 285.839}}, "t": 1711995603.398394}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.18882186853842, "units": "Tflops", "t": 1711995604.7278268}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 292.065}}, "t": 1711995603.9049704}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 295.374}}, "t": 1711995604.4133377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188881750981462, "units": "Tflops", "t": 1711995605.873989}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 284.959}}, "t": 1711995604.920018}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 295.389}}, "t": 1711995605.4302297}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188885743157616, "units": "Tflops", "t": 1711995607.020149}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 283.196}}, "t": 1711995605.9368165}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 298.985}}, "t": 1711995606.4442718}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 283.213}}, "t": 1711995606.95504}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188578350453767, "units": "Tflops", "t": 1711995608.1663568}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 73, "power": 284.56}}, "t": 1711995607.4627156}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 286.806}}, "t": 1711995607.9704804}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188650207606404, "units": "Tflops", "t": 1711995609.3125634}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 296.252}}, "t": 1711995608.4782507}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 288.278}}, "t": 1711995608.985873}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188853805794864, "units": "Tflops", "t": 1711995610.4587553}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 291.285}}, "t": 1711995609.49358}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 292.462}}, "t": 1711995610.0001874}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188530445984323, "units": "Tflops", "t": 1711995611.6049547}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 288.677}}, "t": 1711995610.5088835}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 288.37}}, "t": 1711995611.0154703}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 294.417}}, "t": 1711995611.5229728}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188845821470785, "units": "Tflops", "t": 1711995612.7511482}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 296.952}}, "t": 1711995612.0295527}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 285.538}}, "t": 1711995612.53737}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188702104773565, "units": "Tflops", "t": 1711995613.8973184}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 287.21}}, "t": 1711995613.0449836}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 293.827}}, "t": 1711995613.5534232}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188865782293437, "units": "Tflops", "t": 1711995615.0434825}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 290.315}}, "t": 1711995614.0609398}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 291.595}}, "t": 1711995614.5675347}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.18871008897805, "units": "Tflops", "t": 1711995616.1896799}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 74, "power": 294.395}}, "t": 1711995615.0741148}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 289.916}}, "t": 1711995615.5832317}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 295.291}}, "t": 1711995616.092065}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.18831487883176, "units": "Tflops", "t": 1711995617.3359013}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 286.434}}, "t": 1711995616.6025324}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 285.634}}, "t": 1711995617.1091194}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.18740875136629, "units": "Tflops", "t": 1711995618.4821758}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 285.836}}, "t": 1711995617.6174824}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 297.041}}, "t": 1711995618.1241047}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.187584381721575, "units": "Tflops", "t": 1711995619.6284356}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 284.774}}, "t": 1711995618.632088}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 300.629}}, "t": 1711995619.1387532}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.18760833156459, "units": "Tflops", "t": 1711995620.7746632}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 285.941}}, "t": 1711995619.6468706}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 296.452}}, "t": 1711995620.153492}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 296.355}}, "t": 1711995620.6603007}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188406693878672, "units": "Tflops", "t": 1711995621.9208825}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 290.507}}, "t": 1711995621.1668994}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 294.995}}, "t": 1711995621.6734757}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188773962853084, "units": "Tflops", "t": 1711995623.0670712}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 294.705}}, "t": 1711995622.184044}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 294.911}}, "t": 1711995622.6906343}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188694120575736, "units": "Tflops", "t": 1711995624.2132714}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 288.369}}, "t": 1711995623.1995056}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 300.537}}, "t": 1711995623.707098}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.18743270077087, "units": "Tflops", "t": 1711995625.3595374}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 286.813}}, "t": 1711995624.2146661}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 286.997}}, "t": 1711995624.723322}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 299.369}}, "t": 1711995625.2309868}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.185684551341375, "units": "Tflops", "t": 1711995626.5059168}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 286.912}}, "t": 1711995625.7394037}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 299.549}}, "t": 1711995626.2460592}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.18858234250368, "units": "Tflops", "t": 1711995627.6521072}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 290.613}}, "t": 1711995626.75361}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 295.086}}, "t": 1711995627.2601807}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188674160110214, "units": "Tflops", "t": 1711995628.798269}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 293.036}}, "t": 1711995627.769319}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 293.149}}, "t": 1711995628.278594}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 295.47}}, "t": 1711995628.7851918}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.18856637431398, "units": "Tflops", "t": 1711995629.9444869}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 289.615}}, "t": 1711995629.293368}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 300.544}}, "t": 1711995629.8013697}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188642223451758, "units": "Tflops", "t": 1711995631.0906923}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 287.499}}, "t": 1711995630.3090215}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 300.823}}, "t": 1711995630.8177576}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188486533764124, "units": "Tflops", "t": 1711995632.236902}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 288.159}}, "t": 1711995631.324327}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 301.307}}, "t": 1711995631.8327}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.187604339919936, "units": "Tflops", "t": 1711995633.3831384}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 300.337}}, "t": 1711995632.3393326}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 290.125}}, "t": 1711995632.849796}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 301.804}}, "t": 1711995633.360002}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.187756023584356, "units": "Tflops", "t": 1711995634.5293982}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 290.888}}, "t": 1711995633.8690941}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 297.327}}, "t": 1711995634.3808224}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188546414114228, "units": "Tflops", "t": 1711995635.6755722}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 293.325}}, "t": 1711995634.89142}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 295.758}}, "t": 1711995635.3979595}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188506493839302, "units": "Tflops", "t": 1711995636.8217506}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 296.246}}, "t": 1711995635.904522}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 292.743}}, "t": 1711995636.4122052}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188334838549824, "units": "Tflops", "t": 1711995637.9679415}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 294.616}}, "t": 1711995636.918798}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 289.634}}, "t": 1711995637.4265125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 303.448}}, "t": 1711995637.9338844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188346814400592, "units": "Tflops", "t": 1711995639.1141727}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 288.562}}, "t": 1711995638.4423072}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 301.315}}, "t": 1711995638.9489157}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188526453956, "units": "Tflops", "t": 1711995640.2603815}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 302.975}}, "t": 1711995639.457879}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 288.946}}, "t": 1711995639.9644506}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188510485859318, "units": "Tflops", "t": 1711995641.4065664}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 299.841}}, "t": 1711995640.4742594}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 293.916}}, "t": 1711995640.983512}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.18848254175407, "units": "Tflops", "t": 1711995642.5527747}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 295.28}}, "t": 1711995641.4912848}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 296.953}}, "t": 1711995641.9993787}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 298.775}}, "t": 1711995642.5082777}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188462581728725, "units": "Tflops", "t": 1711995643.6990297}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 297.619}}, "t": 1711995643.014855}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 292.543}}, "t": 1711995643.5234427}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.18848254175407, "units": "Tflops", "t": 1711995644.845231}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 302.765}}, "t": 1711995644.0317478}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 288.869}}, "t": 1711995644.5400329}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.1883667741851, "units": "Tflops", "t": 1711995645.9914536}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 301.985}}, "t": 1711995645.0466225}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 290.31}}, "t": 1711995645.5569143}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.177554655499463, "units": "Tflops", "t": 1711995647.138341}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 302.871}}, "t": 1711995646.0670986}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 289.323}}, "t": 1711995646.5762045}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 291.57}}, "t": 1711995647.085309}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.18842266180261, "units": "Tflops", "t": 1711995648.2845917}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 292.165}}, "t": 1711995647.5935194}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 301.891}}, "t": 1711995648.1001153}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188406693878672, "units": "Tflops", "t": 1711995649.4308054}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 295.279}}, "t": 1711995648.6094964}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 294.612}}, "t": 1711995649.116072}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188466573730473, "units": "Tflops", "t": 1711995650.5770063}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 298.488}}, "t": 1711995649.6246514}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 292.946}}, "t": 1711995650.1312373}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.18824701610095, "units": "Tflops", "t": 1711995651.7232409}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 303.542}}, "t": 1711995650.6392229}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 302.286}}, "t": 1711995651.1458302}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 289.248}}, "t": 1711995651.6565373}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188486533764124, "units": "Tflops", "t": 1711995652.869503}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 304.908}}, "t": 1711995652.1631515}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 291.287}}, "t": 1711995652.669723}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188350806354173, "units": "Tflops", "t": 1711995654.0157206}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 301.309}}, "t": 1711995653.1814837}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 299.657}}, "t": 1711995653.6880734}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.18847854974568, "units": "Tflops", "t": 1711995655.1619284}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 295.371}}, "t": 1711995654.1947448}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 296.25}}, "t": 1711995654.7039485}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.18842266180261, "units": "Tflops", "t": 1711995656.3081472}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 298.288}}, "t": 1711995655.2116754}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 299.667}}, "t": 1711995655.720235}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 293.326}}, "t": 1711995656.2276158}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188430645774545, "units": "Tflops", "t": 1711995657.4543633}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 303.731}}, "t": 1711995656.7360048}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 291.395}}, "t": 1711995657.242613}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.18849451778921, "units": "Tflops", "t": 1711995658.6005497}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 291.084}}, "t": 1711995657.750605}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 303.845}}, "t": 1711995658.2571986}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188502501820942, "units": "Tflops", "t": 1711995659.746755}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 291.286}}, "t": 1711995658.7663784}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 78, "power": 303.839}}, "t": 1711995659.2756538}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188622263094203, "units": "Tflops", "t": 1711995660.8929298}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 302.674}}, "t": 1711995659.7822547}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 296.348}}, "t": 1711995660.2900438}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 298.392}}, "t": 1711995660.7980075}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.18858234250368, "units": "Tflops", "t": 1711995662.0391443}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 299.65}}, "t": 1711995661.3045657}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 300.441}}, "t": 1711995661.8131108}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188434637763002, "units": "Tflops", "t": 1711995663.1853254}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 296.443}}, "t": 1711995662.3197038}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 304.318}}, "t": 1711995662.8272161}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188418669819136, "units": "Tflops", "t": 1711995664.3315158}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 290.795}}, "t": 1711995663.334191}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 304.139}}, "t": 1711995663.843481}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188502501820942, "units": "Tflops", "t": 1711995665.4777312}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 291.586}}, "t": 1711995664.3500664}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 305.872}}, "t": 1711995664.8580294}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 292.763}}, "t": 1711995665.3685558}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188434637763002, "units": "Tflops", "t": 1711995666.623949}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 302.685}}, "t": 1711995665.8762808}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 295.375}}, "t": 1711995666.384268}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188542422079262, "units": "Tflops", "t": 1711995667.7701545}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 302.189}}, "t": 1711995666.8908472}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 297.713}}, "t": 1711995667.398597}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.188450605733454, "units": "Tflops", "t": 1711995668.9163296}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 297.612}}, "t": 1711995667.9071243}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 302.582}}, "t": 1711995668.4149947}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.034047214120115, "units": "Tflops", "t": 1711995670.0718453}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 292.845}}, "t": 1711995668.9226332}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 290.71}}, "t": 1711995669.430276}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 294.01}}, "t": 1711995669.9383872}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.05277866814153, "units": "Tflops", "t": 1711995671.2262728}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 282.804}}, "t": 1711995670.445033}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 302.572}}, "t": 1711995670.9517453}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.048965694770295, "units": "Tflops", "t": 1711995672.3808432}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 295.677}}, "t": 1711995671.4592419}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 297.633}}, "t": 1711995671.9658482}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.090083712849317, "units": "Tflops", "t": 1711995673.5329692}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 310.044}}, "t": 1711995672.4741874}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 296.834}}, "t": 1711995672.9808016}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 293.623}}, "t": 1711995673.4912107}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.062126710298106, "units": "Tflops", "t": 1711995674.6867874}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 290.922}}, "t": 1711995674.0036547}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 311.295}}, "t": 1711995674.5102963}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.044222267760453, "units": "Tflops", "t": 1711995675.8416517}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 310.042}}, "t": 1711995675.0181456}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 301.037}}, "t": 1711995675.5260413}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.048332314533614, "units": "Tflops", "t": 1711995676.99627}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 283.695}}, "t": 1711995676.0326235}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 285.441}}, "t": 1711995676.5409636}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.11125779629289, "units": "Tflops", "t": 1711995678.1471264}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 307.809}}, "t": 1711995677.0475523}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 299.38}}, "t": 1711995677.5559826}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 306.076}}, "t": 1711995678.0635667}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.05562857351921, "units": "Tflops", "t": 1711995679.301327}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 285.041}}, "t": 1711995678.572029}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 283.88}}, "t": 1711995679.0786793}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.08516182181095, "units": "Tflops", "t": 1711995680.45374}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 283.198}}, "t": 1711995679.5864675}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 280.967}}, "t": 1711995680.0930684}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.06204791846984, "units": "Tflops", "t": 1711995681.6075172}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 295.397}}, "t": 1711995680.6022115}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 295.855}}, "t": 1711995681.111424}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.07703401596999, "units": "Tflops", "t": 1711995682.7604187}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 296.079}}, "t": 1711995681.6219065}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 295.104}}, "t": 1711995682.1334057}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 289.158}}, "t": 1711995682.6439834}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.091482530238228, "units": "Tflops", "t": 1711995683.9124534}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 292.175}}, "t": 1711995683.150583}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 296.93}}, "t": 1711995683.6586585}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.07152336483694, "units": "Tflops", "t": 1711995685.065699}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 298.775}}, "t": 1711995684.1652656}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 294.913}}, "t": 1711995684.673175}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 19.10034646969462, "units": "Tflops", "t": 1711995686.2172112}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 297.234}}, "t": 1711995685.179787}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 308.51}}, "t": 1711995685.6873968}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 79, "power": 299.267}}, "t": 1711995686.1951733}, "pipe": "data"} +{"event": "end", "data": {"command": ["/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--dtype", "fp32", "--m", "8192", "--n", "8192", "--number", "10", "--repeat", "90"], "time": 1711995687.146459, "return_code": 0}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/llama.D0.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/llama.D0.data new file mode 100644 index 000000000..86962e9ef --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/llama.D0.data @@ -0,0 +1,623 @@ +{"event": "config", "data": {"capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/llm", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "group": "llm", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 800, "name": "llama", "plan": {"method": "per_gpu"}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["llama", "D0"], "tags": ["llm", "nlp"], "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 30}}, "weight": 1.0, "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 73, "power": 92.99, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711995689.849377, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["python", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1711995689.8589938}, "pipe": null} +{"event": "line", "data": "Dataset\n", "pipe": "stderr"} +{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/datasets/table.py:1421: FutureWarning: promote has been superseded by mode='default'.\n", "pipe": "stderr"} +{"event": "line", "data": " table = cls._concat_blocks(blocks, axis=0)\n", "pipe": "stderr"} +{"event": "line", "data": "Tokenizer\n", "pipe": "stderr"} +{"event": "line", "data": "Model\n", "pipe": "stderr"} +{"event": "line", "data": "Pipeline\n", "pipe": "stderr"} +{"event": "line", "data": "Starting", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =10.109107971191406, total / elapsed =202.88634821636788 in_token_count =9 out_token_count =2042\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 202.88634821636788, "units": "Tok/s", "t": 1711995763.3784115}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27230.375, 81920.0], "load": 0, "temperature": 50, "power": 68.467}}, "t": 1711995753.3420856}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27252.375, 81920.0], "load": 0.11, "temperature": 50, "power": 67.882}}, "t": 1711995753.8482056}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27284.375, 81920.0], "load": 0.95, "temperature": 52, "power": 235.711}}, "t": 1711995754.3575056}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27328.375, 81920.0], "load": 0.95, "temperature": 53, "power": 242.101}}, "t": 1711995754.8666284}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27374.375, 81920.0], "load": 0.96, "temperature": 53, "power": 240.959}}, "t": 1711995755.3750021}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27554.375, 81920.0], "load": 0.96, "temperature": 53, "power": 244.27}}, "t": 1711995755.8873162}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27614.375, 81920.0], "load": 0.96, "temperature": 53, "power": 238.073}}, "t": 1711995756.3949978}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27654.375, 81920.0], "load": 0.95, "temperature": 54, "power": 243.117}}, "t": 1711995756.9063902}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27714.375, 81920.0], "load": 0.96, "temperature": 54, "power": 240.061}}, "t": 1711995757.4146528}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27754.375, 81920.0], "load": 0.97, "temperature": 54, "power": 246.129}}, "t": 1711995757.9223058}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27814.375, 81920.0], "load": 0.95, "temperature": 54, "power": 241.128}}, "t": 1711995758.4307353}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27854.375, 81920.0], "load": 0.95, "temperature": 54, "power": 243.637}}, "t": 1711995758.9403455}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27914.375, 81920.0], "load": 0.97, "temperature": 54, "power": 240.519}}, "t": 1711995759.4479513}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27954.375, 81920.0], "load": 0.96, "temperature": 55, "power": 244.318}}, "t": 1711995759.957238}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28034.375, 81920.0], "load": 0.96, "temperature": 55, "power": 241.067}}, "t": 1711995760.465651}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28034.375, 81920.0], "load": 0.97, "temperature": 55, "power": 242.464}}, "t": 1711995760.9783478}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28054.375, 81920.0], "load": 0.97, "temperature": 55, "power": 241.679}}, "t": 1711995761.484018}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28254.375, 81920.0], "load": 0.97, "temperature": 55, "power": 239.874}}, "t": 1711995761.9964972}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28254.375, 81920.0], "load": 0.97, "temperature": 55, "power": 239.492}}, "t": 1711995762.5089946}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28254.375, 81920.0], "load": 0.97, "temperature": 55, "power": 245.338}}, "t": 1711995763.0166442}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =5.310582160949707, total / elapsed =370.7691436314918 in_token_count =185 out_token_count =1784\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 370.7691436314918, "units": "Tok/s", "t": 1711995768.6890118}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 58, "power": 198.594}}, "t": 1711995763.5249918}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.95, "temperature": 56, "power": 235.88}}, "t": 1711995764.0339088}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 56, "power": 242.213}}, "t": 1711995764.5422738}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 56, "power": 244.949}}, "t": 1711995765.0506353}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 56, "power": 247.478}}, "t": 1711995765.5603323}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 56, "power": 246.908}}, "t": 1711995766.0686421}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 56, "power": 242.634}}, "t": 1711995766.5812082}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 56, "power": 244.93}}, "t": 1711995767.0895922}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 56, "power": 242.771}}, "t": 1711995767.6016386}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 57, "power": 243.669}}, "t": 1711995768.1093245}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 57, "power": 247.409}}, "t": 1711995768.618465}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =6.780025243759155, total / elapsed =306.19355022474383 in_token_count =121 out_token_count =1955\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 306.19355022474383, "units": "Tok/s", "t": 1711995775.4690557}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 57, "power": 244.982}}, "t": 1711995769.1268053}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 57, "power": 240.328}}, "t": 1711995769.6369355}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 57, "power": 241.738}}, "t": 1711995770.1452866}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 57, "power": 249.549}}, "t": 1711995770.6545281}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 57, "power": 235.778}}, "t": 1711995771.1628747}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 57, "power": 240.674}}, "t": 1711995771.6748383}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 57, "power": 235.325}}, "t": 1711995772.1825204}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 57, "power": 249.733}}, "t": 1711995772.6953647}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 58, "power": 248.56}}, "t": 1711995773.2037067}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 58, "power": 244.87}}, "t": 1711995773.7150156}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 58, "power": 242.025}}, "t": 1711995774.223783}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.95, "temperature": 58, "power": 238.018}}, "t": 1711995774.7337456}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 58, "power": 248.098}}, "t": 1711995775.2421043}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =6.650259017944336, total / elapsed =315.02532372755405 in_token_count =127 out_token_count =1968\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 315.02532372755405, "units": "Tok/s", "t": 1711995782.1193342}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 58, "power": 243.878}}, "t": 1711995775.752672}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.95, "temperature": 58, "power": 244.7}}, "t": 1711995776.2605562}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.95, "temperature": 58, "power": 245.852}}, "t": 1711995776.7729142}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.95, "temperature": 58, "power": 249.822}}, "t": 1711995777.2811427}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 59, "power": 249.749}}, "t": 1711995777.7937183}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 59, "power": 251.41}}, "t": 1711995778.3050504}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 59, "power": 248.888}}, "t": 1711995778.814262}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 59, "power": 249.762}}, "t": 1711995779.323552}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 59, "power": 248.844}}, "t": 1711995779.8333223}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 59, "power": 249.963}}, "t": 1711995780.3432696}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 59, "power": 247.89}}, "t": 1711995780.8525734}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 59, "power": 246.993}}, "t": 1711995781.362119}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 60, "power": 244.913}}, "t": 1711995781.8715672}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.387208938598633, total / elapsed =217.8496306384853 in_token_count =6 out_token_count =2039\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 217.8496306384853, "units": "Tok/s", "t": 1711995791.5065625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.94, "temperature": 60, "power": 241.495}}, "t": 1711995782.3812447}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 60, "power": 251.413}}, "t": 1711995782.8923182}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.95, "temperature": 60, "power": 249.758}}, "t": 1711995783.4050295}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 60, "power": 243.789}}, "t": 1711995783.9172273}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.95, "temperature": 60, "power": 247.047}}, "t": 1711995784.4256277}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 60, "power": 250.822}}, "t": 1711995784.9370372}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 60, "power": 242.064}}, "t": 1711995785.4453714}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 60, "power": 250.806}}, "t": 1711995785.9565747}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 61, "power": 251.229}}, "t": 1711995786.4649174}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 61, "power": 254.623}}, "t": 1711995786.9755528}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.95, "temperature": 61, "power": 246.748}}, "t": 1711995787.4830816}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 61, "power": 247.09}}, "t": 1711995787.9954042}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 61, "power": 252.197}}, "t": 1711995788.5037389}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 61, "power": 252.59}}, "t": 1711995789.0150747}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 61, "power": 251.399}}, "t": 1711995789.5225408}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 61, "power": 248.779}}, "t": 1711995790.0322585}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.97, "temperature": 61, "power": 249.546}}, "t": 1711995790.541665}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 62, "power": 244.921}}, "t": 1711995791.0508416}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.672929525375366, total / elapsed =594.8929825373377 in_token_count =256 out_token_count =1929\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 594.8929825373377, "units": "Tok/s", "t": 1711995795.1795084}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28278.375, 81920.0], "load": 0.96, "temperature": 63, "power": 255.127}}, "t": 1711995791.5576684}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.97, "temperature": 62, "power": 250.443}}, "t": 1711995792.069152}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.96, "temperature": 62, "power": 249.964}}, "t": 1711995792.5785825}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.96, "temperature": 62, "power": 252.761}}, "t": 1711995793.0881119}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.97, "temperature": 62, "power": 253.262}}, "t": 1711995793.5975835}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.96, "temperature": 62, "power": 253.726}}, "t": 1711995794.1069517}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.97, "temperature": 62, "power": 251.186}}, "t": 1711995794.616411}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28310.375, 81920.0], "load": 0.97, "temperature": 62, "power": 257.604}}, "t": 1711995795.1258776}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =1.756767749786377, total / elapsed =1292.1457604603866 in_token_count =340 out_token_count =1930\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 1292.1457604603866, "units": "Tok/s", "t": 1711995796.9362948}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 1.0, "temperature": 63, "power": 239.774}}, "t": 1711995795.6347349}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 63, "power": 244.278}}, "t": 1711995796.147579}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 63, "power": 257.289}}, "t": 1711995796.657504}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =7.401246070861816, total / elapsed =277.9261735531621 in_token_count =95 out_token_count =1962\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 277.9261735531621, "units": "Tok/s", "t": 1711995804.3375626}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 63, "power": 224.78}}, "t": 1711995797.1685412}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 63, "power": 255.23}}, "t": 1711995797.6791804}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.94, "temperature": 63, "power": 255.527}}, "t": 1711995798.1875448}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.95, "temperature": 63, "power": 247.025}}, "t": 1711995798.6967278}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.95, "temperature": 63, "power": 254.634}}, "t": 1711995799.2043164}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.94, "temperature": 63, "power": 254.657}}, "t": 1711995799.7138445}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.95, "temperature": 63, "power": 255.258}}, "t": 1711995800.2213385}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 64, "power": 256.636}}, "t": 1711995800.7338789}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 64, "power": 254.545}}, "t": 1711995801.242266}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 64, "power": 254.64}}, "t": 1711995801.7522864}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 64, "power": 254.14}}, "t": 1711995802.2599337}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 64, "power": 248.317}}, "t": 1711995802.771011}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 64, "power": 252.265}}, "t": 1711995803.2793713}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 64, "power": 254.166}}, "t": 1711995803.7901602}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 64, "power": 258.079}}, "t": 1711995804.2984781}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.411189794540405, total / elapsed =216.12570189371365 in_token_count =5 out_token_count =2029\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 216.12570189371365, "units": "Tok/s", "t": 1711995813.7487748}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 64, "power": 257.698}}, "t": 1711995804.808639}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.94, "temperature": 64, "power": 248.91}}, "t": 1711995805.3170307}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 64, "power": 258.366}}, "t": 1711995805.8265495}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.95, "temperature": 65, "power": 257.289}}, "t": 1711995806.3350284}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 65, "power": 249.351}}, "t": 1711995806.8466814}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.95, "temperature": 65, "power": 259.155}}, "t": 1711995807.3550267}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 65, "power": 252.212}}, "t": 1711995807.8639557}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 65, "power": 257.978}}, "t": 1711995808.3722787}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 65, "power": 262.04}}, "t": 1711995808.883023}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 65, "power": 253.078}}, "t": 1711995809.3913317}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 65, "power": 254.722}}, "t": 1711995809.9012816}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 65, "power": 252.418}}, "t": 1711995810.4096897}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 65, "power": 257.353}}, "t": 1711995810.9224513}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 65, "power": 253.387}}, "t": 1711995811.430784}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 65, "power": 250.538}}, "t": 1711995811.9434276}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 66, "power": 251.222}}, "t": 1711995812.4517844}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 66, "power": 261.299}}, "t": 1711995812.9615831}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 66, "power": 259.352}}, "t": 1711995813.4699054}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.7439916133880615, total / elapsed =547.8110561641944 in_token_count =253 out_token_count =1798\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 547.8110561641944, "units": "Tok/s", "t": 1711995817.4927838}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.95, "temperature": 69, "power": 296.737}}, "t": 1711995813.9765341}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 66, "power": 253.091}}, "t": 1711995814.485589}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 66, "power": 262.086}}, "t": 1711995814.99396}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 66, "power": 254.942}}, "t": 1711995815.5024142}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 66, "power": 256.199}}, "t": 1711995816.0129704}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 66, "power": 259.566}}, "t": 1711995816.5216396}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 66, "power": 259.161}}, "t": 1711995817.033413}, "pipe": "data"} +{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/transformers/pipelines/base.py:1101: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.1102468967437744, total / elapsed =668.4356801952209 in_token_count =282 out_token_count =1797\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 668.4356801952209, "units": "Tok/s", "t": 1711995820.603053}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 66, "power": 257.289}}, "t": 1711995817.5475976}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.99, "temperature": 67, "power": 244.092}}, "t": 1711995818.0573807}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 67, "power": 255.439}}, "t": 1711995818.569283}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 67, "power": 254.948}}, "t": 1711995819.076932}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 67, "power": 250.857}}, "t": 1711995819.58729}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 67, "power": 261.122}}, "t": 1711995820.0948906}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 67, "power": 261.114}}, "t": 1711995820.6032345}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.6777124404907227, total / elapsed =559.5869805757293 in_token_count =256 out_token_count =1802\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 559.5869805757293, "units": "Tok/s", "t": 1711995824.2807822}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 67, "power": 253.238}}, "t": 1711995821.1116257}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 67, "power": 260.731}}, "t": 1711995821.6218107}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 67, "power": 265.503}}, "t": 1711995822.1292942}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 67, "power": 260.412}}, "t": 1711995822.639279}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 67, "power": 260.538}}, "t": 1711995823.1476498}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 67, "power": 260.155}}, "t": 1711995823.6631866}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 67, "power": 256.503}}, "t": 1711995824.171592}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.413808345794678, total / elapsed =206.5051601425921 in_token_count =5 out_token_count =1939\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 206.5051601425921, "units": "Tok/s", "t": 1711995833.6946108}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.94, "temperature": 67, "power": 253.739}}, "t": 1711995824.6819916}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.95, "temperature": 67, "power": 260.445}}, "t": 1711995825.1935067}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 67, "power": 254.573}}, "t": 1711995825.703284}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 68, "power": 259.77}}, "t": 1711995826.2132263}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 68, "power": 254.152}}, "t": 1711995826.7221627}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.95, "temperature": 68, "power": 257.395}}, "t": 1711995827.2319245}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 68, "power": 261.116}}, "t": 1711995827.7421737}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 68, "power": 252.135}}, "t": 1711995828.2516592}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 68, "power": 254.771}}, "t": 1711995828.7609808}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 68, "power": 260.426}}, "t": 1711995829.270541}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 68, "power": 264.239}}, "t": 1711995829.78172}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 68, "power": 263.369}}, "t": 1711995830.2933195}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 68, "power": 266.278}}, "t": 1711995830.8043246}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 68, "power": 261.721}}, "t": 1711995831.3126965}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 68, "power": 262.092}}, "t": 1711995831.824284}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 68, "power": 262.483}}, "t": 1711995832.3325846}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.97, "temperature": 68, "power": 261.998}}, "t": 1711995832.8431509}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28352.375, 81920.0], "load": 0.96, "temperature": 68, "power": 258.696}}, "t": 1711995833.3516283}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =1.541945457458496, total / elapsed =1352.8364378323986 in_token_count =349 out_token_count =1737\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 1352.8364378323986, "units": "Tok/s", "t": 1711995835.2365723}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.94, "temperature": 71, "power": 301.309}}, "t": 1711995833.8594763}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 69, "power": 263.589}}, "t": 1711995834.3682933}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 69, "power": 267.878}}, "t": 1711995834.8769279}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =2.9965567588806152, total / elapsed =759.8721410004559 in_token_count =287 out_token_count =1990\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 759.8721410004559, "units": "Tok/s", "t": 1711995838.2331505}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 71, "power": 299.098}}, "t": 1711995835.3899295}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 69, "power": 267.199}}, "t": 1711995835.9023864}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 69, "power": 263.867}}, "t": 1711995836.4106908}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 69, "power": 265.026}}, "t": 1711995836.922704}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 69, "power": 256.943}}, "t": 1711995837.4302807}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 69, "power": 267.652}}, "t": 1711995837.9418368}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.370404958724976, total / elapsed =207.99527966880945 in_token_count =7 out_token_count =1942\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 207.99527966880945, "units": "Tok/s", "t": 1711995847.6035771}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.95, "temperature": 69, "power": 277.233}}, "t": 1711995838.4500961}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 69, "power": 263.945}}, "t": 1711995838.960809}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 69, "power": 265.238}}, "t": 1711995839.4691484}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 69, "power": 262.77}}, "t": 1711995839.9800358}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 69, "power": 265.522}}, "t": 1711995840.4876213}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.95, "temperature": 69, "power": 263.265}}, "t": 1711995841.000395}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 69, "power": 257.223}}, "t": 1711995841.5088184}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.94, "temperature": 69, "power": 254.971}}, "t": 1711995842.0198576}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.93, "temperature": 69, "power": 259.663}}, "t": 1711995842.5313172}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 70, "power": 260.753}}, "t": 1711995843.0410423}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 70, "power": 262.011}}, "t": 1711995843.551031}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 70, "power": 265.214}}, "t": 1711995844.0585783}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 70, "power": 264.274}}, "t": 1711995844.568422}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 70, "power": 266.018}}, "t": 1711995845.0778441}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.97, "temperature": 70, "power": 265.532}}, "t": 1711995845.587466}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 70, "power": 265.219}}, "t": 1711995846.096065}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 70, "power": 260.514}}, "t": 1711995846.606915}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 70, "power": 268.452}}, "t": 1711995847.114393}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =1.2052733898162842, total / elapsed =1812.8667059786762 in_token_count =363 out_token_count =1822\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 1812.8667059786762, "units": "Tok/s", "t": 1711995848.808867}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28396.375, 81920.0], "load": 0.96, "temperature": 70, "power": 265.992}}, "t": 1711995847.6250541}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.99, "temperature": 70, "power": 259.136}}, "t": 1711995848.1349144}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 262.209}}, "t": 1711995848.6459308}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.368046760559082, total / elapsed =215.41310067922487 in_token_count =7 out_token_count =2011\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 215.41310067922487, "units": "Tok/s", "t": 1711995858.1769333}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.93, "temperature": 70, "power": 263.664}}, "t": 1711995849.1536856}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 257.603}}, "t": 1711995849.6647618}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 70, "power": 260.068}}, "t": 1711995850.173594}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 70, "power": 263.985}}, "t": 1711995850.6834497}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 262.974}}, "t": 1711995851.191828}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 268.827}}, "t": 1711995851.701436}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 260.693}}, "t": 1711995852.2097962}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 260.037}}, "t": 1711995852.7230153}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 262.357}}, "t": 1711995853.2308667}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 70, "power": 267.458}}, "t": 1711995853.7422597}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 269.444}}, "t": 1711995854.251029}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 70, "power": 269.349}}, "t": 1711995854.7609913}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 269.045}}, "t": 1711995855.269318}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 270.007}}, "t": 1711995855.7806041}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 269.926}}, "t": 1711995856.28887}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 267.08}}, "t": 1711995856.7995515}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 265.612}}, "t": 1711995857.3073075}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 266.106}}, "t": 1711995857.8202367}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =1.66013503074646, total / elapsed =1242.06764016831 in_token_count =344 out_token_count =1718\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 1242.06764016831, "units": "Tok/s", "t": 1711995859.8370876}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 73, "power": 287.693}}, "t": 1711995858.3361492}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 262.89}}, "t": 1711995858.8450902}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 265.611}}, "t": 1711995859.3539722}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =6.753327131271362, total / elapsed =306.367507420079 in_token_count =122 out_token_count =1947\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 306.367507420079, "units": "Tok/s", "t": 1711995866.5904334}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 264.354}}, "t": 1711995859.8639908}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 261.815}}, "t": 1711995860.3748593}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 264.956}}, "t": 1711995860.8871183}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 267.776}}, "t": 1711995861.39545}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 269.554}}, "t": 1711995861.9062386}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 255.99}}, "t": 1711995862.414722}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 264.256}}, "t": 1711995862.9232707}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 262.394}}, "t": 1711995863.4323113}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 261.558}}, "t": 1711995863.9418385}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 260.352}}, "t": 1711995864.4512548}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 273.888}}, "t": 1711995864.963804}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 269.344}}, "t": 1711995865.47215}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 270.201}}, "t": 1711995865.9849715}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 260.85}}, "t": 1711995866.4925995}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.412825584411621, total / elapsed =216.83180907761175 in_token_count =6 out_token_count =2035\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 216.83180907761175, "units": "Tok/s", "t": 1711995876.0032785}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 71, "power": 264.249}}, "t": 1711995867.0037327}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.94, "temperature": 71, "power": 255.83}}, "t": 1711995867.5112624}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 71, "power": 261.234}}, "t": 1711995868.0220654}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 267.685}}, "t": 1711995868.5304642}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 265.638}}, "t": 1711995869.0403736}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 264.049}}, "t": 1711995869.548755}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 269.92}}, "t": 1711995870.0611727}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 263.744}}, "t": 1711995870.5688138}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 71, "power": 265.13}}, "t": 1711995871.0822456}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 264.258}}, "t": 1711995871.5901036}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 267.69}}, "t": 1711995872.1001284}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 71, "power": 263.857}}, "t": 1711995872.6085153}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 266.402}}, "t": 1711995873.1187687}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 71, "power": 264.6}}, "t": 1711995873.6264288}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 265.394}}, "t": 1711995874.13531}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 261.769}}, "t": 1711995874.6460292}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 72, "power": 267.768}}, "t": 1711995875.1535838}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 265.332}}, "t": 1711995875.6657023}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =7.459133863449097, total / elapsed =272.14956014495357 in_token_count =91 out_token_count =1939\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 272.14956014495357, "units": "Tok/s", "t": 1711995883.4624333}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 74, "power": 285.722}}, "t": 1711995876.1828914}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 263.492}}, "t": 1711995876.6957128}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 267.987}}, "t": 1711995877.2040458}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.94, "temperature": 72, "power": 269.919}}, "t": 1711995877.7144463}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 263.471}}, "t": 1711995878.222752}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 256.835}}, "t": 1711995878.7329645}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 265.021}}, "t": 1711995879.2413342}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 264.544}}, "t": 1711995879.751992}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 268.66}}, "t": 1711995880.2595484}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 268.448}}, "t": 1711995880.7690644}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 267.291}}, "t": 1711995881.2773538}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 264.851}}, "t": 1711995881.790882}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 261.354}}, "t": 1711995882.2989507}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 258.252}}, "t": 1711995882.8081105}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 270.416}}, "t": 1711995883.3157184}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =5.859767436981201, total / elapsed =343.52899183265964 in_token_count =162 out_token_count =1851\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 343.52899183265964, "units": "Tok/s", "t": 1711995889.3222222}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 256.424}}, "t": 1711995883.8240266}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 270.988}}, "t": 1711995884.3334}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 270.916}}, "t": 1711995884.8418002}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 270.912}}, "t": 1711995885.3508658}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 261.332}}, "t": 1711995885.8631506}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 72, "power": 264.519}}, "t": 1711995886.3737717}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 259.68}}, "t": 1711995886.8866205}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 271.002}}, "t": 1711995887.3949335}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 270.327}}, "t": 1711995887.9063468}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 267.394}}, "t": 1711995888.414635}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 268.234}}, "t": 1711995888.9255524}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =5.291563272476196, total / elapsed =389.2989451179744 in_token_count =186 out_token_count =1874\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 389.2989451179744, "units": "Tok/s", "t": 1711995894.613804}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 74, "power": 333.163}}, "t": 1711995889.432598}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 269.829}}, "t": 1711995889.9421093}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 260.066}}, "t": 1711995890.4497879}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 264.079}}, "t": 1711995890.9622707}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 263.691}}, "t": 1711995891.4708872}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 265.323}}, "t": 1711995891.9839392}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 264.31}}, "t": 1711995892.4921086}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 272.261}}, "t": 1711995893.003235}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 270.216}}, "t": 1711995893.5115745}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 269.225}}, "t": 1711995894.0225089}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 264.372}}, "t": 1711995894.5308867}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =6.903409719467163, total / elapsed =292.31931494799915 in_token_count =117 out_token_count =1901\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 292.31931494799915, "units": "Tok/s", "t": 1711995901.5172331}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.98, "temperature": 72, "power": 264.931}}, "t": 1711995895.0409033}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 271.0}}, "t": 1711995895.5492537}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 265.112}}, "t": 1711995896.0604632}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.93, "temperature": 72, "power": 267.774}}, "t": 1711995896.5679538}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 272.829}}, "t": 1711995897.0798528}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 272.271}}, "t": 1711995897.5882719}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 72, "power": 266.686}}, "t": 1711995898.0979738}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.94, "temperature": 72, "power": 260.771}}, "t": 1711995898.6063092}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 270.134}}, "t": 1711995899.1164627}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 273.722}}, "t": 1711995899.6250527}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 272.466}}, "t": 1711995900.1338618}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 271.286}}, "t": 1711995900.643179}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 269.123}}, "t": 1711995901.1519349}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.378474950790405, total / elapsed =217.94588253687604 in_token_count =6 out_token_count =2038\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 217.94588253687604, "units": "Tok/s", "t": 1711995910.895728}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 268.561}}, "t": 1711995901.6650813}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 72, "power": 257.822}}, "t": 1711995902.1734676}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 271.095}}, "t": 1711995902.6862142}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 268.671}}, "t": 1711995903.1944606}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 268.56}}, "t": 1711995903.7037315}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 263.989}}, "t": 1711995904.2120388}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 268.742}}, "t": 1711995904.7230499}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 271.991}}, "t": 1711995905.2313707}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 257.441}}, "t": 1711995905.741889}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 262.796}}, "t": 1711995906.249867}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 72, "power": 267.162}}, "t": 1711995906.7616074}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 268.089}}, "t": 1711995907.2700152}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 266.329}}, "t": 1711995907.7828755}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 265.117}}, "t": 1711995908.2903752}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 265.327}}, "t": 1711995908.8017204}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 261.589}}, "t": 1711995909.3101392}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 271.391}}, "t": 1711995909.8215888}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 272.052}}, "t": 1711995910.329911}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 271.58}}, "t": 1711995910.840956}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =7.449252605438232, total / elapsed =272.64480177746884 in_token_count =91 out_token_count =1940\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 272.64480177746884, "units": "Tok/s", "t": 1711995918.345003}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 271.493}}, "t": 1711995911.3485577}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 261.353}}, "t": 1711995911.8592043}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 261.909}}, "t": 1711995912.3675594}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 271.062}}, "t": 1711995912.878701}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 271.979}}, "t": 1711995913.3871117}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 261.65}}, "t": 1711995913.8981009}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 266.88}}, "t": 1711995914.4068174}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 263.474}}, "t": 1711995914.9177406}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 263.848}}, "t": 1711995915.4256105}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 261.719}}, "t": 1711995915.9354112}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 268.812}}, "t": 1711995916.44374}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 266.899}}, "t": 1711995916.9588563}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 258.821}}, "t": 1711995917.4672928}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 270.704}}, "t": 1711995917.978324}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.342105150222778, total / elapsed =218.0450730584449 in_token_count =9 out_token_count =2028\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 218.0450730584449, "units": "Tok/s", "t": 1711995927.68713}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.94, "temperature": 73, "power": 267.775}}, "t": 1711995918.4882834}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 72, "power": 261.33}}, "t": 1711995918.9959102}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 267.374}}, "t": 1711995919.5037003}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 72, "power": 257.833}}, "t": 1711995920.0134525}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 72, "power": 271.291}}, "t": 1711995920.5227695}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 264.343}}, "t": 1711995921.0313554}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 266.881}}, "t": 1711995921.5398915}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 271.983}}, "t": 1711995922.0518546}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.94, "temperature": 72, "power": 264.342}}, "t": 1711995922.5596373}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 271.484}}, "t": 1711995923.072071}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 269.528}}, "t": 1711995923.5803893}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 73, "power": 271.863}}, "t": 1711995924.0894318}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 271.581}}, "t": 1711995924.5977783}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 270.795}}, "t": 1711995925.1079035}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 266.805}}, "t": 1711995925.6163828}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 268.138}}, "t": 1711995926.1266806}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 260.068}}, "t": 1711995926.634961}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 272.76}}, "t": 1711995927.1445243}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 267.176}}, "t": 1711995927.6528864}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.32261061668396, total / elapsed =579.9656421742218 in_token_count =273 out_token_count =1654\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 579.9656421742218, "units": "Tok/s", "t": 1711995931.0097609}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.99, "temperature": 73, "power": 258.192}}, "t": 1711995928.1617858}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 269.335}}, "t": 1711995928.6725073}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 268.165}}, "t": 1711995929.1809025}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 267.61}}, "t": 1711995929.6914873}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 265.111}}, "t": 1711995930.199847}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 265.993}}, "t": 1711995930.712197}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.416344165802002, total / elapsed =617.9119835558 in_token_count =269 out_token_count =1842\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 617.9119835558, "units": "Tok/s", "t": 1711995934.426122}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.94, "temperature": 75, "power": 304.109}}, "t": 1711995931.2190888}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 260.689}}, "t": 1711995931.726754}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 264.637}}, "t": 1711995932.2401698}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 271.084}}, "t": 1711995932.750056}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 270.792}}, "t": 1711995933.2600818}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 269.226}}, "t": 1711995933.7698271}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 264.257}}, "t": 1711995934.2786996}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =4.701732397079468, total / elapsed =375.81892178669955 in_token_count =213 out_token_count =1554\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 375.81892178669955, "units": "Tok/s", "t": 1711995939.1278727}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 1.0, "temperature": 73, "power": 252.006}}, "t": 1711995934.790233}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 73, "power": 270.737}}, "t": 1711995935.2992086}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 272.377}}, "t": 1711995935.809733}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 272.259}}, "t": 1711995936.3180962}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 273.741}}, "t": 1711995936.8326285}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 273.436}}, "t": 1711995937.3403041}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 272.944}}, "t": 1711995937.853184}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 268.272}}, "t": 1711995938.3614848}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 262.491}}, "t": 1711995938.8713672}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.27197790145874, total / elapsed =218.72355839857417 in_token_count =11 out_token_count =2017\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 218.72355839857417, "units": "Tok/s", "t": 1711995948.3998723}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.94, "temperature": 73, "power": 253.291}}, "t": 1711995939.3803365}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 273.058}}, "t": 1711995939.8901513}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 267.499}}, "t": 1711995940.3984575}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 265.903}}, "t": 1711995940.9077015}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 271.37}}, "t": 1711995941.4159985}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 259.775}}, "t": 1711995941.9276743}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 266.379}}, "t": 1711995942.4359782}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 270.6}}, "t": 1711995942.9485471}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 270.026}}, "t": 1711995943.4568248}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 262.833}}, "t": 1711995943.9647472}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 265.436}}, "t": 1711995944.4730854}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 268.732}}, "t": 1711995944.9831939}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 265.244}}, "t": 1711995945.4908755}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 267.305}}, "t": 1711995946.0014796}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.94, "temperature": 73, "power": 267.704}}, "t": 1711995946.509034}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 271.983}}, "t": 1711995947.0204468}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 265.523}}, "t": 1711995947.5281298}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 259.412}}, "t": 1711995948.0394344}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =6.1725013256073, total / elapsed =313.32516559803537 in_token_count =148 out_token_count =1786\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 313.32516559803537, "units": "Tok/s", "t": 1711995954.5723922}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 75, "power": 270.878}}, "t": 1711995948.546669}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 73, "power": 258.406}}, "t": 1711995949.0550468}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 267.311}}, "t": 1711995949.5644336}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 266.131}}, "t": 1711995950.074155}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 271.086}}, "t": 1711995950.5833018}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 270.42}}, "t": 1711995951.0927382}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 272.962}}, "t": 1711995951.602194}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 272.623}}, "t": 1711995952.1104019}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 272.856}}, "t": 1711995952.6182473}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 271.693}}, "t": 1711995953.129434}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 270.903}}, "t": 1711995953.64231}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 267.201}}, "t": 1711995954.1552837}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =1.5126137733459473, total / elapsed =606.8964967635832 in_token_count =110 out_token_count =808\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 606.8964967635832, "units": "Tok/s", "t": 1711995956.085022}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 75, "power": 262.974}}, "t": 1711995954.6632996}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 266.399}}, "t": 1711995955.1733942}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 269.518}}, "t": 1711995955.6817145}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.412853002548218, total / elapsed =218.42474321477306 in_token_count =6 out_token_count =2050\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 218.42474321477306, "units": "Tok/s", "t": 1711995965.4978974}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 271.391}}, "t": 1711995956.1903346}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.92, "temperature": 73, "power": 265.447}}, "t": 1711995956.6995513}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 262.225}}, "t": 1711995957.2083952}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 271.59}}, "t": 1711995957.717679}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.93, "temperature": 73, "power": 259.767}}, "t": 1711995958.226757}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 271.19}}, "t": 1711995958.7393398}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 73, "power": 272.962}}, "t": 1711995959.2473838}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 262.406}}, "t": 1711995959.7587516}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 264.441}}, "t": 1711995960.2663918}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 269.15}}, "t": 1711995960.777642}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 269.031}}, "t": 1711995961.2859128}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 272.167}}, "t": 1711995961.796672}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 272.285}}, "t": 1711995962.3050332}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 260.562}}, "t": 1711995962.8155868}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 272.271}}, "t": 1711995963.3232286}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 272.635}}, "t": 1711995963.8353984}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 271.383}}, "t": 1711995964.34379}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 268.973}}, "t": 1711995964.8568056}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 266.1}}, "t": 1711995965.3651621}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =3.2096517086029053, total / elapsed =629.663335303038 in_token_count =278 out_token_count =1743\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 629.663335303038, "units": "Tok/s", "t": 1711995968.7075694}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.99, "temperature": 74, "power": 300.563}}, "t": 1711995965.8755505}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 259.983}}, "t": 1711995966.3847017}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 265.43}}, "t": 1711995966.8936372}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 272.863}}, "t": 1711995967.4033568}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 270.308}}, "t": 1711995967.9128218}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 263.934}}, "t": 1711995968.4217443}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.41004204750061, total / elapsed =205.84392611874506 in_token_count =6 out_token_count =1931\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 205.84392611874506, "units": "Tok/s", "t": 1711995978.1176322}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.94, "temperature": 73, "power": 269.124}}, "t": 1711995968.9301698}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 258.858}}, "t": 1711995969.439453}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 267.178}}, "t": 1711995969.951934}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 264.137}}, "t": 1711995970.4595497}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 268.966}}, "t": 1711995970.9720008}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 73, "power": 270.096}}, "t": 1711995971.4803479}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 73, "power": 266.221}}, "t": 1711995971.990911}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 266.392}}, "t": 1711995972.4992418}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.94, "temperature": 73, "power": 259.072}}, "t": 1711995973.010103}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.94, "temperature": 73, "power": 264.578}}, "t": 1711995973.5184624}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 271.409}}, "t": 1711995974.028401}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 274.102}}, "t": 1711995974.536784}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 73, "power": 260.484}}, "t": 1711995975.0492752}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 266.796}}, "t": 1711995975.557762}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 273.054}}, "t": 1711995976.0718966}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 273.636}}, "t": 1711995976.583307}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 268.284}}, "t": 1711995977.0935092}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 268.241}}, "t": 1711995977.6034033}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 262.085}}, "t": 1711995978.1131136}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =1.6592671871185303, total / elapsed =1331.310603349019 in_token_count =344 out_token_count =1865\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 1331.310603349019, "units": "Tok/s", "t": 1711995979.776917}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.99, "temperature": 74, "power": 260.109}}, "t": 1711995978.6213264}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 267.52}}, "t": 1711995979.1321614}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 269.062}}, "t": 1711995979.6406126}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =7.146080493927002, total / elapsed =281.5529438424141 in_token_count =105 out_token_count =1907\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 281.5529438424141, "units": "Tok/s", "t": 1711995986.9230163}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 74, "power": 269.538}}, "t": 1711995980.1499383}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 265.039}}, "t": 1711995980.6577647}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 267.006}}, "t": 1711995981.1700819}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 271.009}}, "t": 1711995981.678049}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 260.15}}, "t": 1711995982.191586}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 264.184}}, "t": 1711995982.699408}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 263.184}}, "t": 1711995983.2092853}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 266.093}}, "t": 1711995983.7187772}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 266.866}}, "t": 1711995984.2279284}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 266.508}}, "t": 1711995984.739031}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 264.739}}, "t": 1711995985.2473853}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 268.626}}, "t": 1711995985.757871}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 273.935}}, "t": 1711995986.2660372}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 273.708}}, "t": 1711995986.7778711}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =9.135133266448975, total / elapsed =223.20418767055423 in_token_count =17 out_token_count =2022\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 223.20418767055423, "units": "Tok/s", "t": 1711995996.058172}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.93, "temperature": 73, "power": 272.749}}, "t": 1711995987.2861552}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 268.914}}, "t": 1711995987.7989047}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 73, "power": 268.045}}, "t": 1711995988.3099682}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 259.366}}, "t": 1711995988.8199148}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 73, "power": 267.099}}, "t": 1711995989.3299372}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 272.661}}, "t": 1711995989.8389668}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 263.879}}, "t": 1711995990.3487735}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 270.023}}, "t": 1711995990.8573248}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 73, "power": 270.502}}, "t": 1711995991.3667521}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 272.748}}, "t": 1711995991.8758647}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 274.012}}, "t": 1711995992.3852034}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 273.83}}, "t": 1711995992.8964036}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 73, "power": 273.563}}, "t": 1711995993.4052114}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 74, "power": 272.28}}, "t": 1711995993.9128149}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 74, "power": 272.162}}, "t": 1711995994.4227448}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 74, "power": 271.595}}, "t": 1711995994.932359}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.97, "temperature": 74, "power": 271.388}}, "t": 1711995995.4421184}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 74, "power": 267.169}}, "t": 1711995995.951464}, "pipe": "data"} +{"event": "line", "data": "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n", "pipe": "stderr"} +{"event": "line", "data": "elapsed =4.645482778549194, total / elapsed =449.4688925856038 in_token_count =216 out_token_count =1872\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "rate": 449.4688925856038, "units": "Tok/s", "t": 1711996000.7036726}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.98, "temperature": 74, "power": 253.504}}, "t": 1711995996.4592528}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.95, "temperature": 74, "power": 271.489}}, "t": 1711995996.9700568}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 74, "power": 271.991}}, "t": 1711995997.4783242}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 74, "power": 271.59}}, "t": 1711995997.9901145}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 74, "power": 267.888}}, "t": 1711995998.4978797}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 73, "power": 260.176}}, "t": 1711995999.0084395}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 74, "power": 274.412}}, "t": 1711995999.5160155}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 74, "power": 270.125}}, "t": 1711996000.0266666}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28442.375, 81920.0], "load": 0.96, "temperature": 74, "power": 272.07}}, "t": 1711996000.5347307}, "pipe": "data"} +{"event": "end", "data": {"command": ["python", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/llama/main.py", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1711996001.8250673, "return_code": 0}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/opt-1_3b-multinode.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/opt-1_3b-multinode.data new file mode 100644 index 000000000..3b45f0015 --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/opt-1_3b-multinode.data @@ -0,0 +1 @@ +{"event": "message", "data": {"message": "Skip opt-1_3b-multinode because the following capability is not satisfied: len(nodes) >= 2"}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/opt-1_3b.local.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/opt-1_3b.local.data new file mode 100644 index 000000000..456009360 --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/opt-1_3b.local.data @@ -0,0 +1,14 @@ +{"event": "config", "data": {"argv": {"--cpus_per_gpu": 8, "--dataset_config_name": "wikitext-103-v1", "--dataset_name": "wikitext", "--dataset_rev": "b08601e", "--max_train_steps": 100, "--model_name": "facebook/opt-1.3b", "--per_gpu_batch_size": 1, "--validation_split_percentage": 5}, "capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/accelerate_opt", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/opt", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "gradient_accumulation_steps": 1, "group": "opt", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 600, "name": "opt-1_3b", "num_machines": 1, "plan": {"method": "njobs", "n": 1}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["opt-1_3b", "local"], "tags": ["huggingface", "language-modeling", "llm", "multigpu", "nlp", "transformer"], "use_deepspeed": false, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 60}}, "weight": 5.0}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.07, "memory": 0.010771942138671876}, "temperature": 70, "power": 90.516, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711996004.437195, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/accelerate_opt/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "accelerate", "launch", "--mixed_precision=fp16", "--dynamo_backend=no", "--machine_rank=0", "--num_machines=1", "--multi_gpu", "--gradient_accumulation_steps=1", "--num_cpu_threads_per_process=8", "--main_process_ip=127.0.0.1", "--main_process_port=8123", "--num_processes=1", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/accelerate_opt/main.py", "--cpus_per_gpu", "8", "--dataset_config_name", "wikitext-103-v1", "--dataset_name", "wikitext", "--dataset_rev", "b08601e", "--max_train_steps", "100", "--model_name", "facebook/opt-1.3b", "--per_gpu_batch_size", "1", "--validation_split_percentage", "5", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1711996004.4543693}, "pipe": null} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/accelerate\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/accelerate/commands/accelerate_cli.py\", line 47, in main\n", "pipe": "stderr"} +{"event": "line", "data": " args.func(args)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/accelerate/commands/launch.py\", line 972, in launch_command\n", "pipe": "stderr"} +{"event": "line", "data": " args, defaults, mp_from_config_flag = _validate_launch_command(args)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/accelerate/commands/launch.py\", line 822, in _validate_launch_command\n", "pipe": "stderr"} +{"event": "line", "data": " raise ValueError(\"You need to use at least 2 processes to use `--multi_gpu`.\")\n", "pipe": "stderr"} +{"event": "line", "data": "ValueError: You need to use at least 2 processes to use `--multi_gpu`.\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/accelerate_opt/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "accelerate", "launch", "--mixed_precision=fp16", "--dynamo_backend=no", "--machine_rank=0", "--num_machines=1", "--multi_gpu", "--gradient_accumulation_steps=1", "--num_cpu_threads_per_process=8", "--main_process_ip=127.0.0.1", "--main_process_port=8123", "--num_processes=1", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/accelerate_opt/main.py", "--cpus_per_gpu", "8", "--dataset_config_name", "wikitext-103-v1", "--dataset_name", "wikitext", "--dataset_rev", "b08601e", "--max_train_steps", "100", "--model_name", "facebook/opt-1.3b", "--per_gpu_batch_size", "1", "--validation_split_percentage", "5", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1711996006.498313, "return_code": 1}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/opt-6_7b-multinode.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/opt-6_7b-multinode.data new file mode 100644 index 000000000..cccd5c098 --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/opt-6_7b-multinode.data @@ -0,0 +1 @@ +{"event": "message", "data": {"message": "Skip opt-6_7b-multinode because the following capability is not satisfied: len(nodes) >= 2"}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/opt-6_7b.local.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/opt-6_7b.local.data new file mode 100644 index 000000000..ee26409e2 --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/opt-6_7b.local.data @@ -0,0 +1,291 @@ +{"event": "config", "data": {"argv": {"--cpus_per_gpu": 8, "--dataset_config_name": "wikitext-103-v1", "--dataset_name": "wikitext", "--dataset_rev": "b08601e", "--max_train_steps": 100, "--model_name": "facebook/opt-6.7b", "--per_gpu_batch_size": 1, "--validation_split_percentage": 5}, "capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/accelerate_opt", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/opt", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "gradient_accumulation_steps": 1, "group": "opt", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 600, "name": "opt-6_7b", "num_machines": 1, "plan": {"method": "njobs", "n": 1}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["opt-6_7b", "local"], "tags": ["huggingface", "language-modeling", "llm", "multigpu", "nlp", "transformer"], "use_deepspeed": true, "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 60}}, "weight": 5.0}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 67, "power": 54.174, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711996009.148776, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/accelerate_opt/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "accelerate", "launch", "--mixed_precision=fp16", "--dynamo_backend=no", "--machine_rank=0", "--num_machines=1", "--use_deepspeed", "--deepspeed_multinode_launcher=standard", "--zero_stage=2", "--gradient_accumulation_steps=1", "--num_cpu_threads_per_process=8", "--main_process_ip=127.0.0.1", "--main_process_port=8123", "--num_processes=1", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/accelerate_opt/main.py", "--cpus_per_gpu", "8", "--dataset_config_name", "wikitext-103-v1", "--dataset_name", "wikitext", "--dataset_rev", "b08601e", "--max_train_steps", "100", "--model_name", "facebook/opt-6.7b", "--per_gpu_batch_size", "1", "--validation_split_percentage", "5", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1711996009.1659405}, "pipe": null} +{"event": "line", "data": "[2024-04-01 18:26:53,945] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", "pipe": "stdout"} +{"event": "line", "data": "[2024-04-01 18:26:54,257] [INFO] [comm.py:637:init_distributed] cdb=None\n", "pipe": "stdout"} +{"event": "line", "data": "[2024-04-01 18:26:54,257] [INFO] [comm.py:668:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 64}}}, "pipe": "data"} +{"event": "line", "data": "[04/01/24 18:26:54] INFO [0/1] __main__ - Distributed logging.py:60\n", "pipe": "stdout"} +{"event": "line", "data": " environment: DEEPSPEED Backend: nccl \n", "pipe": "stdout"} +{"event": "line", "data": " Num processes: 1 \n", "pipe": "stdout"} +{"event": "line", "data": " Process index: 0 \n", "pipe": "stdout"} +{"event": "line", "data": " Local process index: 0 \n", "pipe": "stdout"} +{"event": "line", "data": " Device: cuda:0 \n", "pipe": "stdout"} +{"event": "line", "data": " \n", "pipe": "stdout"} +{"event": "line", "data": " Mixed precision type: fp16 \n", "pipe": "stdout"} +{"event": "line", "data": " ds_config: {'train_batch_size': \n", "pipe": "stdout"} +{"event": "line", "data": " 'auto', \n", "pipe": "stdout"} +{"event": "line", "data": " 'train_micro_batch_size_per_gpu': \n", "pipe": "stdout"} +{"event": "line", "data": " 'auto', \n", "pipe": "stdout"} +{"event": "line", "data": " 'gradient_accumulation_steps': 1, \n", "pipe": "stdout"} +{"event": "line", "data": " 'zero_optimization': {'stage': 2, \n", "pipe": "stdout"} +{"event": "line", "data": " 'offload_optimizer': {'device': \n", "pipe": "stdout"} +{"event": "line", "data": " 'none', 'nvme_path': None}, \n", "pipe": "stdout"} +{"event": "line", "data": " 'offload_param': {'device': 'none', \n", "pipe": "stdout"} +{"event": "line", "data": " 'nvme_path': None}, \n", "pipe": "stdout"} +{"event": "line", "data": " 'stage3_gather_16bit_weights_on_model \n", "pipe": "stdout"} +{"event": "line", "data": " _save': False}, 'steps_per_print': \n", "pipe": "stdout"} +{"event": "line", "data": " inf, 'fp16': {'enabled': True, \n", "pipe": "stdout"} +{"event": "line", "data": " 'auto_cast': True}, 'bf16': \n", "pipe": "stdout"} +{"event": "line", "data": " {'enabled': False}} \n", "pipe": "stdout"} +{"event": "line", "data": " \n", "pipe": "stdout"} +{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/datasets/table.py:1421: FutureWarning: promote has been superseded by mode='default'.\n", "pipe": "stderr"} +{"event": "line", "data": " table = cls._concat_blocks(blocks, axis=0)\n", "pipe": "stderr"} +{"event": "line", "data": "loading configuration file config.json from cache at /Users/satyaortiz-gagne/travail/mila/milabench/cache/huggingface/hub/models--facebook--opt-6.7b/snapshots/a45aa65bbeb77c1558bc99bedc6779195462dab0/config.json\n", "pipe": "stderr"} +{"event": "line", "data": "Model config OPTConfig {\n", "pipe": "stderr"} +{"event": "line", "data": " \"_name_or_path\": \"facebook/opt-6.7b\",\n", "pipe": "stderr"} +{"event": "line", "data": " \"_remove_final_layer_norm\": false,\n", "pipe": "stderr"} +{"event": "line", "data": " \"activation_dropout\": 0.0,\n", "pipe": "stderr"} +{"event": "line", "data": " \"activation_function\": \"relu\",\n", "pipe": "stderr"} +{"event": "line", "data": " \"architectures\": [\n", "pipe": "stderr"} +{"event": "line", "data": " \"OPTForCausalLM\"\n", "pipe": "stderr"} +{"event": "line", "data": " ],\n", "pipe": "stderr"} +{"event": "line", "data": " \"attention_dropout\": 0.0,\n", "pipe": "stderr"} +{"event": "line", "data": " \"bos_token_id\": 2,\n", "pipe": "stderr"} +{"event": "line", "data": " \"do_layer_norm_before\": true,\n", "pipe": "stderr"} +{"event": "line", "data": " \"dropout\": 0.1,\n", "pipe": "stderr"} +{"event": "line", "data": " \"enable_bias\": true,\n", "pipe": "stderr"} +{"event": "line", "data": " \"eos_token_id\": 2,\n", "pipe": "stderr"} +{"event": "line", "data": " \"ffn_dim\": 16384,\n", "pipe": "stderr"} +{"event": "line", "data": " \"hidden_size\": 4096,\n", "pipe": "stderr"} +{"event": "line", "data": " \"init_std\": 0.02,\n", "pipe": "stderr"} +{"event": "line", "data": " \"layer_norm_elementwise_affine\": true,\n", "pipe": "stderr"} +{"event": "line", "data": " \"layerdrop\": 0.0,\n", "pipe": "stderr"} +{"event": "line", "data": " \"max_position_embeddings\": 2048,\n", "pipe": "stderr"} +{"event": "line", "data": " \"model_type\": \"opt\",\n", "pipe": "stderr"} +{"event": "line", "data": " \"num_attention_heads\": 32,\n", "pipe": "stderr"} +{"event": "line", "data": " \"num_hidden_layers\": 32,\n", "pipe": "stderr"} +{"event": "line", "data": " \"pad_token_id\": 1,\n", "pipe": "stderr"} +{"event": "line", "data": " \"prefix\": \"\",\n", "pipe": "stderr"} +{"event": "line", "data": " \"torch_dtype\": \"float16\",\n", "pipe": "stderr"} +{"event": "line", "data": " \"transformers_version\": \"4.35.0\",\n", "pipe": "stderr"} +{"event": "line", "data": " \"use_cache\": true,\n", "pipe": "stderr"} +{"event": "line", "data": " \"vocab_size\": 50272,\n", "pipe": "stderr"} +{"event": "line", "data": " \"word_embed_proj_dim\": 4096\n", "pipe": "stderr"} +{"event": "line", "data": "}\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "loading configuration file config.json from cache at /Users/satyaortiz-gagne/travail/mila/milabench/cache/huggingface/hub/models--facebook--opt-6.7b/snapshots/a45aa65bbeb77c1558bc99bedc6779195462dab0/config.json\n", "pipe": "stderr"} +{"event": "line", "data": "Model config OPTConfig {\n", "pipe": "stderr"} +{"event": "line", "data": " \"_name_or_path\": \"facebook/opt-6.7b\",\n", "pipe": "stderr"} +{"event": "line", "data": " \"_remove_final_layer_norm\": false,\n", "pipe": "stderr"} +{"event": "line", "data": " \"activation_dropout\": 0.0,\n", "pipe": "stderr"} +{"event": "line", "data": " \"activation_function\": \"relu\",\n", "pipe": "stderr"} +{"event": "line", "data": " \"architectures\": [\n", "pipe": "stderr"} +{"event": "line", "data": " \"OPTForCausalLM\"\n", "pipe": "stderr"} +{"event": "line", "data": " ],\n", "pipe": "stderr"} +{"event": "line", "data": " \"attention_dropout\": 0.0,\n", "pipe": "stderr"} +{"event": "line", "data": " \"bos_token_id\": 2,\n", "pipe": "stderr"} +{"event": "line", "data": " \"do_layer_norm_before\": true,\n", "pipe": "stderr"} +{"event": "line", "data": " \"dropout\": 0.1,\n", "pipe": "stderr"} +{"event": "line", "data": " \"enable_bias\": true,\n", "pipe": "stderr"} +{"event": "line", "data": " \"eos_token_id\": 2,\n", "pipe": "stderr"} +{"event": "line", "data": " \"ffn_dim\": 16384,\n", "pipe": "stderr"} +{"event": "line", "data": " \"hidden_size\": 4096,\n", "pipe": "stderr"} +{"event": "line", "data": " \"init_std\": 0.02,\n", "pipe": "stderr"} +{"event": "line", "data": " \"layer_norm_elementwise_affine\": true,\n", "pipe": "stderr"} +{"event": "line", "data": " \"layerdrop\": 0.0,\n", "pipe": "stderr"} +{"event": "line", "data": " \"max_position_embeddings\": 2048,\n", "pipe": "stderr"} +{"event": "line", "data": " \"model_type\": \"opt\",\n", "pipe": "stderr"} +{"event": "line", "data": " \"num_attention_heads\": 32,\n", "pipe": "stderr"} +{"event": "line", "data": " \"num_hidden_layers\": 32,\n", "pipe": "stderr"} +{"event": "line", "data": " \"pad_token_id\": 1,\n", "pipe": "stderr"} +{"event": "line", "data": " \"prefix\": \"\",\n", "pipe": "stderr"} +{"event": "line", "data": " \"torch_dtype\": \"float16\",\n", "pipe": "stderr"} +{"event": "line", "data": " \"transformers_version\": \"4.35.0\",\n", "pipe": "stderr"} +{"event": "line", "data": " \"use_cache\": true,\n", "pipe": "stderr"} +{"event": "line", "data": " \"vocab_size\": 50272,\n", "pipe": "stderr"} +{"event": "line", "data": " \"word_embed_proj_dim\": 4096\n", "pipe": "stderr"} +{"event": "line", "data": "}\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "loading file vocab.json from cache at /Users/satyaortiz-gagne/travail/mila/milabench/cache/huggingface/hub/models--facebook--opt-6.7b/snapshots/a45aa65bbeb77c1558bc99bedc6779195462dab0/vocab.json\n", "pipe": "stderr"} +{"event": "line", "data": "loading file merges.txt from cache at /Users/satyaortiz-gagne/travail/mila/milabench/cache/huggingface/hub/models--facebook--opt-6.7b/snapshots/a45aa65bbeb77c1558bc99bedc6779195462dab0/merges.txt\n", "pipe": "stderr"} +{"event": "line", "data": "loading file tokenizer.json from cache at None\n", "pipe": "stderr"} +{"event": "line", "data": "loading file added_tokens.json from cache at None\n", "pipe": "stderr"} +{"event": "line", "data": "loading file special_tokens_map.json from cache at /Users/satyaortiz-gagne/travail/mila/milabench/cache/huggingface/hub/models--facebook--opt-6.7b/snapshots/a45aa65bbeb77c1558bc99bedc6779195462dab0/special_tokens_map.json\n", "pipe": "stderr"} +{"event": "line", "data": "loading file tokenizer_config.json from cache at /Users/satyaortiz-gagne/travail/mila/milabench/cache/huggingface/hub/models--facebook--opt-6.7b/snapshots/a45aa65bbeb77c1558bc99bedc6779195462dab0/tokenizer_config.json\n", "pipe": "stderr"} +{"event": "line", "data": "loading configuration file config.json from cache at /Users/satyaortiz-gagne/travail/mila/milabench/cache/huggingface/hub/models--facebook--opt-6.7b/snapshots/a45aa65bbeb77c1558bc99bedc6779195462dab0/config.json\n", "pipe": "stderr"} +{"event": "line", "data": "Model config OPTConfig {\n", "pipe": "stderr"} +{"event": "line", "data": " \"_name_or_path\": \"facebook/opt-6.7b\",\n", "pipe": "stderr"} +{"event": "line", "data": " \"_remove_final_layer_norm\": false,\n", "pipe": "stderr"} +{"event": "line", "data": " \"activation_dropout\": 0.0,\n", "pipe": "stderr"} +{"event": "line", "data": " \"activation_function\": \"relu\",\n", "pipe": "stderr"} +{"event": "line", "data": " \"architectures\": [\n", "pipe": "stderr"} +{"event": "line", "data": " \"OPTForCausalLM\"\n", "pipe": "stderr"} +{"event": "line", "data": " ],\n", "pipe": "stderr"} +{"event": "line", "data": " \"attention_dropout\": 0.0,\n", "pipe": "stderr"} +{"event": "line", "data": " \"bos_token_id\": 2,\n", "pipe": "stderr"} +{"event": "line", "data": " \"do_layer_norm_before\": true,\n", "pipe": "stderr"} +{"event": "line", "data": " \"dropout\": 0.1,\n", "pipe": "stderr"} +{"event": "line", "data": " \"enable_bias\": true,\n", "pipe": "stderr"} +{"event": "line", "data": " \"eos_token_id\": 2,\n", "pipe": "stderr"} +{"event": "line", "data": " \"ffn_dim\": 16384,\n", "pipe": "stderr"} +{"event": "line", "data": " \"hidden_size\": 4096,\n", "pipe": "stderr"} +{"event": "line", "data": " \"init_std\": 0.02,\n", "pipe": "stderr"} +{"event": "line", "data": " \"layer_norm_elementwise_affine\": true,\n", "pipe": "stderr"} +{"event": "line", "data": " \"layerdrop\": 0.0,\n", "pipe": "stderr"} +{"event": "line", "data": " \"max_position_embeddings\": 2048,\n", "pipe": "stderr"} +{"event": "line", "data": " \"model_type\": \"opt\",\n", "pipe": "stderr"} +{"event": "line", "data": " \"num_attention_heads\": 32,\n", "pipe": "stderr"} +{"event": "line", "data": " \"num_hidden_layers\": 32,\n", "pipe": "stderr"} +{"event": "line", "data": " \"pad_token_id\": 1,\n", "pipe": "stderr"} +{"event": "line", "data": " \"prefix\": \"\",\n", "pipe": "stderr"} +{"event": "line", "data": " \"torch_dtype\": \"float16\",\n", "pipe": "stderr"} +{"event": "line", "data": " \"transformers_version\": \"4.35.0\",\n", "pipe": "stderr"} +{"event": "line", "data": " \"use_cache\": true,\n", "pipe": "stderr"} +{"event": "line", "data": " \"vocab_size\": 50272,\n", "pipe": "stderr"} +{"event": "line", "data": " \"word_embed_proj_dim\": 4096\n", "pipe": "stderr"} +{"event": "line", "data": "}\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "loading configuration file config.json from cache at /Users/satyaortiz-gagne/travail/mila/milabench/cache/huggingface/hub/models--facebook--opt-6.7b/snapshots/a45aa65bbeb77c1558bc99bedc6779195462dab0/config.json\n", "pipe": "stderr"} +{"event": "line", "data": "Model config OPTConfig {\n", "pipe": "stderr"} +{"event": "line", "data": " \"_name_or_path\": \"facebook/opt-6.7b\",\n", "pipe": "stderr"} +{"event": "line", "data": " \"_remove_final_layer_norm\": false,\n", "pipe": "stderr"} +{"event": "line", "data": " \"activation_dropout\": 0.0,\n", "pipe": "stderr"} +{"event": "line", "data": " \"activation_function\": \"relu\",\n", "pipe": "stderr"} +{"event": "line", "data": " \"architectures\": [\n", "pipe": "stderr"} +{"event": "line", "data": " \"OPTForCausalLM\"\n", "pipe": "stderr"} +{"event": "line", "data": " ],\n", "pipe": "stderr"} +{"event": "line", "data": " \"attention_dropout\": 0.0,\n", "pipe": "stderr"} +{"event": "line", "data": " \"bos_token_id\": 2,\n", "pipe": "stderr"} +{"event": "line", "data": " \"do_layer_norm_before\": true,\n", "pipe": "stderr"} +{"event": "line", "data": " \"dropout\": 0.1,\n", "pipe": "stderr"} +{"event": "line", "data": " \"enable_bias\": true,\n", "pipe": "stderr"} +{"event": "line", "data": " \"eos_token_id\": 2,\n", "pipe": "stderr"} +{"event": "line", "data": " \"ffn_dim\": 16384,\n", "pipe": "stderr"} +{"event": "line", "data": " \"hidden_size\": 4096,\n", "pipe": "stderr"} +{"event": "line", "data": " \"init_std\": 0.02,\n", "pipe": "stderr"} +{"event": "line", "data": " \"layer_norm_elementwise_affine\": true,\n", "pipe": "stderr"} +{"event": "line", "data": " \"layerdrop\": 0.0,\n", "pipe": "stderr"} +{"event": "line", "data": " \"max_position_embeddings\": 2048,\n", "pipe": "stderr"} +{"event": "line", "data": " \"model_type\": \"opt\",\n", "pipe": "stderr"} +{"event": "line", "data": " \"num_attention_heads\": 32,\n", "pipe": "stderr"} +{"event": "line", "data": " \"num_hidden_layers\": 32,\n", "pipe": "stderr"} +{"event": "line", "data": " \"pad_token_id\": 1,\n", "pipe": "stderr"} +{"event": "line", "data": " \"prefix\": \"\",\n", "pipe": "stderr"} +{"event": "line", "data": " \"torch_dtype\": \"float16\",\n", "pipe": "stderr"} +{"event": "line", "data": " \"transformers_version\": \"4.35.0\",\n", "pipe": "stderr"} +{"event": "line", "data": " \"use_cache\": true,\n", "pipe": "stderr"} +{"event": "line", "data": " \"vocab_size\": 50272,\n", "pipe": "stderr"} +{"event": "line", "data": " \"word_embed_proj_dim\": 4096\n", "pipe": "stderr"} +{"event": "line", "data": "}\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "[04/01/24 18:26:55] WARNING [0/1] __main__ - The tokenizer picked logging.py:60\n", "pipe": "stdout"} +{"event": "line", "data": " seems to have a very large \n", "pipe": "stdout"} +{"event": "line", "data": " `model_max_length` \n", "pipe": "stdout"} +{"event": "line", "data": " (1000000000000000019884624838656). \n", "pipe": "stdout"} +{"event": "line", "data": " Picking 1024 instead. You can change \n", "pipe": "stdout"} +{"event": "line", "data": " that default value by passing \n", "pipe": "stdout"} +{"event": "line", "data": " --block_size xxx. \n", "pipe": "stdout"} +{"event": "line", "data": "Generate config GenerationConfig {\n", "pipe": "stderr"} +{"event": "line", "data": " \"bos_token_id\": 2,\n", "pipe": "stderr"} +{"event": "line", "data": " \"eos_token_id\": 2,\n", "pipe": "stderr"} +{"event": "line", "data": " \"pad_token_id\": 1\n", "pipe": "stderr"} +{"event": "line", "data": "}\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1468.375, 81920.0], "load": 0, "temperature": 63}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1468.375, 81920.0], "load": 0, "temperature": 63}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1468.375, 81920.0], "load": 0, "temperature": 62}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1468.375, 81920.0], "load": 0, "temperature": 61}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1468.375, 81920.0], "load": 0, "temperature": 61}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1468.375, 81920.0], "load": 0, "temperature": 60}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1468.375, 81920.0], "load": 0, "temperature": 60}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1468.375, 81920.0], "load": 0, "temperature": 59}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1468.375, 81920.0], "load": 0, "temperature": 59}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1468.375, 81920.0], "load": 0, "temperature": 58}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1468.375, 81920.0], "load": 0, "temperature": 58}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1468.375, 81920.0], "load": 0, "temperature": 57}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1468.375, 81920.0], "load": 0, "temperature": 57}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1468.375, 81920.0], "load": 0, "temperature": 56}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1468.375, 81920.0], "load": 0, "temperature": 56}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1468.375, 81920.0], "load": 0, "temperature": 55}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1468.375, 81920.0], "load": 0, "temperature": 55}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1468.375, 81920.0], "load": 0, "temperature": 55}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1468.375, 81920.0], "load": 0, "temperature": 54}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1468.375, 81920.0], "load": 0, "temperature": 54}}}, "pipe": "data"} +{"event": "line", "data": "You are resizing the embedding layer without providing a `pad_to_multiple_of` parameter. This means that the new embedding dimension will be 50265. This might induce some performance reduction as *Tensor Cores* will not be available. For more details about this, or help on choosing the correct value for resizing, refer to this guide: https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html#requirements-tc\n", "pipe": "stderr"} +{"event": "line", "data": "[04/01/24 18:27:56] INFO [0/1] accelerate.accelerator - Since logging.py:60\n", "pipe": "stdout"} +{"event": "line", "data": " you passed both train and evaluation \n", "pipe": "stdout"} +{"event": "line", "data": " dataloader, `is_train_batch_min` \n", "pipe": "stdout"} +{"event": "line", "data": " (here True will decide the \n", "pipe": "stdout"} +{"event": "line", "data": " `train_batch_size` (1). \n", "pipe": "stdout"} +{"event": "line", "data": "[2024-04-01 18:27:56,713] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed info: version=0.12.2, git-hash=unknown, git-branch=unknown\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1468.375, 81920.0], "load": 0, "temperature": 53}}}, "pipe": "data"} +{"event": "line", "data": "[2024-04-01 18:27:58,465] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False\n", "pipe": "stdout"} +{"event": "line", "data": "[2024-04-01 18:27:58,466] [INFO] [logging.py:96:log_dist] [Rank 0] Using client Optimizer as basic optimizer\n", "pipe": "stdout"} +{"event": "line", "data": "[2024-04-01 18:27:58,466] [INFO] [logging.py:96:log_dist] [Rank 0] Removing param_group that has no 'params' in the basic Optimizer\n", "pipe": "stdout"} +{"event": "line", "data": "[2024-04-01 18:27:58,488] [INFO] [logging.py:96:log_dist] [Rank 0] DeepSpeed Basic Optimizer = AdamW\n", "pipe": "stdout"} +{"event": "line", "data": "[2024-04-01 18:27:58,488] [INFO] [utils.py:56:is_zero_supported_optimizer] Checking ZeRO support for optimizer=AdamW type=\n", "pipe": "stdout"} +{"event": "line", "data": "[2024-04-01 18:27:58,488] [INFO] [logging.py:96:log_dist] [Rank 0] Creating torch.float16 ZeRO stage 2 optimizer\n", "pipe": "stdout"} +{"event": "line", "data": "[2024-04-01 18:27:58,488] [INFO] [stage_1_and_2.py:147:__init__] Reduce bucket size 500,000,000\n", "pipe": "stdout"} +{"event": "line", "data": "[2024-04-01 18:27:58,489] [INFO] [stage_1_and_2.py:148:__init__] Allgather bucket size 500,000,000\n", "pipe": "stdout"} +{"event": "line", "data": "[2024-04-01 18:27:58,489] [INFO] [stage_1_and_2.py:149:__init__] CPU Offload: False\n", "pipe": "stdout"} +{"event": "line", "data": "[2024-04-01 18:27:58,489] [INFO] [stage_1_and_2.py:150:__init__] Round robin gradient partitioning: False\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1534.375, 81920.0], "load": 0.28, "temperature": 53}}}, "pipe": "data"} +{"event": "line", "data": "[2024-04-01 18:28:01,939] [INFO] [utils.py:802:see_memory_usage] Before initializing optimizer states\n", "pipe": "stdout"} +{"event": "line", "data": "[2024-04-01 18:28:01,939] [INFO] [utils.py:803:see_memory_usage] MA 37.21 GB Max_MA 37.21 GB CA 37.22 GB Max_CA 37 GB \n", "pipe": "stdout"} +{"event": "line", "data": "[2024-04-01 18:28:01,940] [INFO] [utils.py:810:see_memory_usage] CPU Virtual Memory: used = 3.8 GB, percent = 1.8%\n", "pipe": "stdout"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/accelerate_opt/main.py\", line 430, in \n", "pipe": "stderr"} +{"event": "line", "data": " main()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/accelerate_opt/main.py\", line 347, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ) = accelerator.prepare(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/accelerate/accelerator.py\", line 1284, in prepare\n", "pipe": "stderr"} +{"event": "line", "data": " result = self._prepare_deepspeed(*args)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/accelerate/accelerator.py\", line 1666, in _prepare_deepspeed\n", "pipe": "stderr"} +{"event": "line", "data": " engine, optimizer, _, lr_scheduler = deepspeed.initialize(**kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/__init__.py\", line 171, in initialize\n", "pipe": "stderr"} +{"event": "line", "data": " engine = DeepSpeedEngine(args=args,\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/runtime/engine.py\", line 304, in __init__\n", "pipe": "stderr"} +{"event": "line", "data": " self._configure_optimizer(optimizer, model_parameters)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/runtime/engine.py\", line 1219, in _configure_optimizer\n", "pipe": "stderr"} +{"event": "line", "data": " self.optimizer = self._configure_zero_optimizer(basic_optimizer)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/runtime/engine.py\", line 1480, in _configure_zero_optimizer\n", "pipe": "stderr"} +{"event": "line", "data": " optimizer = DeepSpeedZeroOptimizer(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/runtime/zero/stage_1_and_2.py\", line 510, in __init__\n", "pipe": "stderr"} +{"event": "line", "data": " self.initialize_optimizer_states()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/runtime/zero/stage_1_and_2.py\", line 645, in initialize_optimizer_states\n", "pipe": "stderr"} +{"event": "line", "data": " self.optimizer.step()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/optim/lr_scheduler.py\", line 68, in wrapper\n", "pipe": "stderr"} +{"event": "line", "data": " return wrapped(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/optim/optimizer.py\", line 373, in wrapper\n", "pipe": "stderr"} +{"event": "line", "data": " out = func(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/optim/optimizer.py\", line 76, in _use_grad\n", "pipe": "stderr"} +{"event": "line", "data": " ret = func(self, *args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/optim/adamw.py\", line 173, in step\n", "pipe": "stderr"} +{"event": "line", "data": " self._init_group(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/optim/adamw.py\", line 121, in _init_group\n", "pipe": "stderr"} +{"event": "line", "data": " state[\"exp_avg\"] = torch.zeros_like(\n", "pipe": "stderr"} +{"event": "line", "data": "torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 24.80 GiB. GPU 0 has a total capacty of 79.14 GiB of which 16.41 GiB is free. Including non-PyTorch memory, this process has 62.72 GiB memory in use. Of the allocated memory 62.01 GiB is allocated by PyTorch, and 11.34 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF\n", "pipe": "stderr"} +{"event": "line", "data": "[2024-04-01 18:28:05,983] torch.distributed.elastic.multiprocessing.api: [ERROR] failed (exitcode: 1) local_rank: 0 (pid: 20648) of binary: /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/python\n", "pipe": "stderr"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/accelerate\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/accelerate/commands/accelerate_cli.py\", line 47, in main\n", "pipe": "stderr"} +{"event": "line", "data": " args.func(args)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/accelerate/commands/launch.py\", line 979, in launch_command\n", "pipe": "stderr"} +{"event": "line", "data": " deepspeed_launcher(args)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/accelerate/commands/launch.py\", line 695, in deepspeed_launcher\n", "pipe": "stderr"} +{"event": "line", "data": " distrib_run.run(args)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/run.py\", line 797, in run\n", "pipe": "stderr"} +{"event": "line", "data": " elastic_launch(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/launcher/api.py\", line 134, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " return launch_agent(self._config, self._entrypoint, list(args))\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/distributed/launcher/api.py\", line 264, in launch_agent\n", "pipe": "stderr"} +{"event": "line", "data": " raise ChildFailedError(\n", "pipe": "stderr"} +{"event": "line", "data": "torch.distributed.elastic.multiprocessing.errors.ChildFailedError: \n", "pipe": "stderr"} +{"event": "line", "data": "============================================================\n", "pipe": "stderr"} +{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/accelerate_opt/main.py FAILED\n", "pipe": "stderr"} +{"event": "line", "data": "------------------------------------------------------------\n", "pipe": "stderr"} +{"event": "line", "data": "Failures:\n", "pipe": "stderr"} +{"event": "line", "data": " \n", "pipe": "stderr"} +{"event": "line", "data": "------------------------------------------------------------\n", "pipe": "stderr"} +{"event": "line", "data": "Root Cause (first observed failure):\n", "pipe": "stderr"} +{"event": "line", "data": "[0]:\n", "pipe": "stderr"} +{"event": "line", "data": " time : 2024-04-01_18:28:05\n", "pipe": "stderr"} +{"event": "line", "data": " host : vm.internal.cloudapp.net\n", "pipe": "stderr"} +{"event": "line", "data": " rank : 0 (local_rank: 0)\n", "pipe": "stderr"} +{"event": "line", "data": " exitcode : 1 (pid: 20648)\n", "pipe": "stderr"} +{"event": "line", "data": " error_file: \n", "pipe": "stderr"} +{"event": "line", "data": " traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html\n", "pipe": "stderr"} +{"event": "line", "data": "============================================================\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/accelerate_opt/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "accelerate", "launch", "--mixed_precision=fp16", "--dynamo_backend=no", "--machine_rank=0", "--num_machines=1", "--use_deepspeed", "--deepspeed_multinode_launcher=standard", "--zero_stage=2", "--gradient_accumulation_steps=1", "--num_cpu_threads_per_process=8", "--main_process_ip=127.0.0.1", "--main_process_port=8123", "--num_processes=1", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/accelerate_opt/main.py", "--cpus_per_gpu", "8", "--dataset_config_name", "wikitext-103-v1", "--dataset_name", "wikitext", "--dataset_rev", "b08601e", "--max_train_steps", "100", "--model_name", "facebook/opt-6.7b", "--per_gpu_batch_size", "1", "--validation_split_percentage", "5", "--cache", "/Users/satyaortiz-gagne/travail/mila/milabench/cache"], "time": 1711996086.3043292, "return_code": 1}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/reformer.D0.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/reformer.D0.data new file mode 100644 index 000000000..79165e71a --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/reformer.D0.data @@ -0,0 +1,291 @@ +{"event": "config", "data": {"argv": {"--batch-size": 64, "--model": "Reformer", "--num-workers": 8, "--precision": "tf32-fp16"}, "capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "group": "hf", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 600, "name": "reformer", "plan": {"method": "per_gpu"}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["reformer", "D0"], "tags": ["huggingface", "language-modeling", "nlp", "transformer"], "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 60}}, "weight": 1.0, "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 52, "power": 70.054, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711996088.985997, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-reformer.D0-ce1dc503950f5ff93fdadb45d1d68afe.json", "-m", "bench", "--batch-size", "64", "--model", "Reformer", "--num-workers", "8", "--precision", "tf32-fp16"], "time": 1711996089.0032966}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "line", "data": "We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 5.877231121063232}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.84849214553833}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 57, "power": 287.901}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.813655853271484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.790578365325928}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.775251388549805}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 58, "power": 323.309}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.763707160949707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.755547046661377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.036738194926734, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.750585556030273}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 59, "power": 328.691}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 57.03889911480985, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.745539665222168}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.930552995397974, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.7407636642456055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.862143665891345, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.735927581787109}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 0.98, "temperature": 60, "power": 282.726}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.914573348140166, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.7315802574157715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.93278598995013, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.729065418243408}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.9747914278631, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.725703239440918}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 61, "power": 293.629}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.97827590164984, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.722489833831787}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.935323972673224, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.720373153686523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.901966579353655, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.716850280761719}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 62, "power": 329.845}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.78839448022378, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.713784217834473}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.78807712954151, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.711319446563721}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 63, "power": 313.543}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.76205460764728, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.708925724029541}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.77761395090133, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.70553731918335}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.74023456833203, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.702667236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 64, "power": 245.723}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.67771201623243, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.7002410888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.66833452200478, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.697482585906982}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.634732246675526, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.695372104644775}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 65, "power": 308.513}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.6276615599284, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.692790508270264}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.65934917369204, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.689642429351807}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.60171503125793, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.687574863433838}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 66, "power": 297.049}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.56575847686966, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.685352325439453}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.563971009313896, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.68161153793335}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.55556666695705, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.678653717041016}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 67, "power": 306.247}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.52830358337483, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.675431728363037}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.42632697632247, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.672421932220459}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.39555655145017, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.668979644775391}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 67, "power": 254.721}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.538616729489405, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.665622234344482}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.461085552290236, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.660311698913574}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 68, "power": 299.456}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.433508648231076, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.656097412109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.43991682569393, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.6530022621154785}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.46265840366127, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.648213863372803}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 69, "power": 295.387}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.430244726403174, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.642153739929199}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.39455666274892, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.636126518249512}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.36817119742151, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.63180685043335}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 69, "power": 273.73}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.37865857093091, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.624069690704346}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.35108083534634, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.61751651763916}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.37695918885226, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.6096882820129395}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 0.98, "temperature": 70, "power": 285.242}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.35790564202269, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.601232528686523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.31005619816636, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.592065334320068}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.28553967364757, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.583576679229736}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 70, "power": 249.629}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.26800882349548, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.5719146728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.24821116406726, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.559656620025635}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 71, "power": 322.843}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.30796511842975, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.5499796867370605}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.23277111363598, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.536138534545898}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.23315773542822, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.524857997894287}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 72, "power": 313.557}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.2764312866326, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.514582633972168}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.23517013212198, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.498445987701416}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.16378886381604, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.485159873962402}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 72, "power": 280.969}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.231521608122826, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.4730682373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.24540822154459, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.461663722991943}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.23168726693003, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.450464248657227}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 73, "power": 288.96}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.23013881758413, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.447353363037109}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.19364557145769, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.435917854309082}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.2102762636359, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.423335552215576}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 0.98, "temperature": 73, "power": 285.544}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.24424999213635, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.414547443389893}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.222300304846094, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.405716419219971}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.11573546371018, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.392375946044922}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 73, "power": 340.541}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.04925883507795, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.381011009216309}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.1111043914167, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.367169380187988}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 73, "power": 253.899}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 56.16563659752109, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [25420.375, 81920.0], "load": 1.0, "temperature": 73, "power": 295.391}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-reformer.D0-ce1dc503950f5ff93fdadb45d1d68afe.json", "-m", "bench", "--batch-size", "64", "--model", "Reformer", "--num-workers", "8", "--precision", "tf32-fp16"], "time": 1711996163.9008703, "return_code": 0}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/regnet_y_128gf.D0.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/regnet_y_128gf.D0.data new file mode 100644 index 000000000..01f5e3d60 --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/regnet_y_128gf.D0.data @@ -0,0 +1,406 @@ +{"event": "config", "data": {"argv": {"--batch-size": 64, "--epochs": 50, "--lr": 0.01, "--model": "regnet_y_128gf", "--no-stdout": true, "--precision": "tf32-fp16"}, "capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "group": "torchvision", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 600, "name": "regnet_y_128gf", "plan": {"method": "per_gpu"}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["regnet_y_128gf", "D0"], "tags": ["classification", "convnet", "lstm", "resnet", "vision"], "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 60}}, "weight": 2.0, "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 70, "power": 89.153, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711996166.571403, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-regnet_y_128gf.D0-ce1dc503950f5ff93fdadb45d1d68afe.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--batch-size", "64", "--epochs", "50", "--lr", "0.01", "--model", "regnet_y_128gf", "--no-stdout", "--precision", "tf32-fp16"], "time": 1711996166.588517}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 65, "power": 52.877}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1728.375, 81920.0], "load": 0.01, "temperature": 64, "power": 80.899}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03619384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00616455078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94366455078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92230224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 68, "power": 297.285}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02435302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.16741943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96478271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.15771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.96, "temperature": 70, "power": 274.595}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.017822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 85.29701079544424, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13763427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05963134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.4857604516596, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03607177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 71, "power": 303.875}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97662353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.54223875199982, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.35614013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.4664306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.31069310389502, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.239990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.95, "temperature": 72, "power": 265.517}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.38839461055385, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.17510986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.5086669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.52060932859041, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.4044189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 72, "power": 316.13}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9764404296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.12439785690505, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06768798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.12451171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.64888810410258, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07763671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01544189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 86.12167680923224, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.99, "temperature": 73, "power": 311.304}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04644775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.41251207844948, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14105224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.45257568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.17001261948887, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.43798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.99, "temperature": 74, "power": 299.502}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08331298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.00251504910214, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04290771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01446533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.98795891715989, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9464111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.25390625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.95, "temperature": 74, "power": 306.286}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.21672627444839, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.16436767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.26544189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.1070304942647, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03173828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1256103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 84.24031529849557, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.99, "temperature": 74, "power": 302.884}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.007080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.047119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.22775503126724, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1983642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 62.56706749710997, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.35552978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92474365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 75, "power": 299.271}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.37342037250995, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2254638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.22601318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.30999858809214, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05743408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06365966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.3383223005363, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 75, "power": 298.133}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.277587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.32281494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.17872058958835, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.38861083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 82.34985655369115, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0684814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.96, "temperature": 75, "power": 305.215}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9925537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 85.1470010987735, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0540771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.47707719004714, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.20361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.15069580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.24213042484408, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 76, "power": 323.265}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.054931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.018310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.25929957284653, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.141845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9903564453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.27968623702648, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00531005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.96, "temperature": 76, "power": 294.804}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86383056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.16742955845102, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1290283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7943115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 94.34790568947672, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00726318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 60.740870020371176, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.95, "temperature": 76, "power": 312.293}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90667724609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.39354727809722, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90716552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01605224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.0875678255441, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.853515625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 76, "power": 291.297}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.24219322157548, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98956298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.10979898808668, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0294189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84466552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.96, "temperature": 75, "power": 303.247}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.6783196486179, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89312744140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9566650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 83.53307745411436, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14141845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14886474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 90.60161181768012, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.96, "temperature": 76, "power": 294.097}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1663818359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 66.56072494214207, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2611083984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05230712890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.13863380252162, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.191650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 77, "power": 307.647}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1234130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.15228346322489, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02764892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.987060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.99728755706066, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0841064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92816162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.92825232165981, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 76, "power": 293.176}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04681396484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9100341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 84.90521739715254, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96356201171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 92.49896335400061, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97906494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.5179443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 77, "power": 318.414}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.16382231064765, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2796630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.15191650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.08241705812299, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99945068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.021728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.97286187645823, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 77, "power": 322.327}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10284423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02862548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.96649706792081, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8790283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13043212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 84.96567011219895, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05841064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.95, "temperature": 77, "power": 293.93}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.5086584992234, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0330810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97674560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.53878470473148, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.036376953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.22381591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.02471488881783, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 78, "power": 301.723}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04339599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.62652862632022, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.39404296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.82452237217629, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99554443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 78, "power": 314.787}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 91.40460344403434, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.09002685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.18446349526336, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98956298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0643310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 74.99607552274718, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 78, "power": 293.837}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.2716064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 76.07790226938528, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99871826171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9783935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.8309225172371, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92926025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 0.96, "temperature": 78, "power": 320.725}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98858642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 75.70348199939757, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31570.375, 81920.0], "load": 1.0, "temperature": 78, "power": 313.039}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-regnet_y_128gf.D0-ce1dc503950f5ff93fdadb45d1d68afe.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--batch-size", "64", "--epochs", "50", "--lr", "0.01", "--model", "regnet_y_128gf", "--no-stdout", "--precision", "tf32-fp16"], "time": 1711996258.7413826, "return_code": 0}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/remote.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/remote.data new file mode 100644 index 000000000..da3f4988b --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/remote.data @@ -0,0 +1,283 @@ +{"event": "config", "data": {"name": "remote", "tag": ["remote"], "definition": ".", "run_name": "jofevozu.2024-04-01_14:01:06.047754", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/llm", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache"}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml.86f95fc6853bd4fbb206b790834bc0a3", "plan": {"method": "per_gpu"}, "system": {"arch": "cpu", "sshkey": null, "nodes": [{"aliaslist": [], "hostname": "electrictortoise.eastus2.cloudapp.azure.com", "ip": "electrictortoise.eastus2.cloudapp.azure.com", "ipaddrlist": ["40.79.18.211"], "key": "/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.electrictortoise.pem", "local": false, "main": true, "name": "manager", "user": "ubuntu"}], "cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "self": null}, "hash": "86f95fc6853bd4fbb206b790834bc0a3", "install_variant": "cpu"}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 8, "brand": "Apple M1 Pro"}, "os": {"sysname": "Darwin", "nodename": "MBP-de-Mila", "release": "22.5.0", "version": "Darwin Kernel Version 22.5.0: Thu Jun 8 22:22:20 PDT 2023; root:xnu-8796.121.3~7/RELEASE_ARM64_T6000", "machine": "x86_64"}, "accelerators": {"arch": "cpu", "gpus": {}}, "date": 1712011407.274264, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {}}, "pipe": null} +{"event": "start", "data": {"command": ["rsync", "--force", "-av", "-e", "ssh -i/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.electrictortoise.pem -oCheckHostIP=no -oStrictHostKeyChecking=no", "--include=*/.git/*", "--exclude=*/.*/*", "--exclude=*/venv/*", "--exclude=*/env/*", "--exclude=*/tmp/*", "ubuntu@electrictortoise.eastus2.cloudapp.azure.com:/Users/satyaortiz-gagne/travail/mila/milabench/runs", "/Users/satyaortiz-gagne/travail/mila/milabench"], "time": 1711997007.3936079}, "pipe": null} +{"event": "line", "data": "receiving file list ... ", "pipe": "stdout"} +{"event": "line", "data": "done\n", "pipe": "stdout"} +{"event": "line", "data": "runs/\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/bert-fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/bert-fp16.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/bert-fp32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/bert-tf32-fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/bert-tf32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/bf16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/bf16.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/convnext_large-fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/convnext_large-fp16.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/convnext_large-fp32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/convnext_large-tf32-fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/convnext_large-tf32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/davit_large-multi.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/davit_large.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/davit_large.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/dlrm.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/dlrm.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/focalnet.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/fp32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/llama.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/llama.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/opt-1_3b-multinode.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/opt-1_3b.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/opt-1_3b.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/opt-6_7b-multinode.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/opt-6_7b.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/reformer.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/regnet_y_128gf.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/resnet152-multi.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/resnet152.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/resnet50.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/rwkv.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/rwkv.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/stargan.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/stargan.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/super-slomo.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/super-slomo.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/t5.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/tf32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:13:57.440963/whisper.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/bert-fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/bert-fp32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/bert-tf32-fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/bert-tf32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/bf16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/convnext_large-fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/convnext_large-fp32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/convnext_large-tf32-fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/convnext_large-tf32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/davit_large-multi.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/davit_large.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/dlrm.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/focalnet.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/fp32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/llama.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/opt-1_3b-multinode.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/opt-1_3b.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/opt-6_7b-multinode.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/opt-6_7b.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/reformer.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/regnet_y_128gf.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/resnet152-multi.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/resnet152.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/resnet50.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/rwkv.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/stargan.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/super-slomo.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/t5.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/tf32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:21:44.664386/whisper.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/bert-fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/bert-fp32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/bert-tf32-fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/bert-tf32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/bf16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/convnext_large-fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/convnext_large-fp32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/convnext_large-tf32-fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/convnext_large-tf32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/davit_large-multi.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/davit_large.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/dlrm.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/focalnet.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/fp32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/llama.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/llama.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/opt-1_3b-multinode.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/opt-1_3b.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/opt-6_7b-multinode.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/opt-6_7b.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/reformer.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/regnet_y_128gf.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/resnet152-multi.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/resnet152.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/resnet50.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/rwkv.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/stargan.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/super-slomo.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/t5.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/tf32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:22:55.198137/whisper.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/bert-fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/bert-fp32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/bert-tf32-fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/bert-tf32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/bf16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/convnext_large-fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/convnext_large-fp32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/convnext_large-tf32-fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/convnext_large-tf32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/davit_large-multi.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/davit_large.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/dlrm.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/focalnet.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/fp32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/llama.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/opt-1_3b-multinode.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/opt-1_3b.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/opt-6_7b-multinode.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/opt-6_7b.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/reformer.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/regnet_y_128gf.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/resnet152-multi.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/resnet152.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/resnet50.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/rwkv.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/stargan.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/super-slomo.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/t5.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/tf32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_13:30:06.785842/whisper.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_16:26:55.224878/\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_16:26:55.224878/remote.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_16:26:55.224878/remote.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_16:28:09.456766/\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_16:28:09.456766/remote.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/install.2024-04-01_16:28:09.456766/remote.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/bert-fp16.D0.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/bert-fp32.D0.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/bert-tf32-fp16.D0.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/bert-tf32.D0.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/bf16.D0.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/convnext_large-fp16.D0.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/convnext_large-fp32.D0.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/convnext_large-tf32-fp16.D0.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/convnext_large-tf32.D0.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/davit_large-multi.0.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/davit_large-multi.0.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/davit_large.D0.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/davit_large.D0.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/dlrm.0.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/dlrm.0.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/focalnet.D0.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/focalnet.D0.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/fp16.D0.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/fp32.D0.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/llama.D0.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/llama.D0.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/opt-1_3b-multinode.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/opt-1_3b.local.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/opt-1_3b.local.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/opt-6_7b-multinode.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/opt-6_7b.local.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/opt-6_7b.local.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/reformer.D0.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/reformer.D0.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/regnet_y_128gf.D0.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/resnet152-multi.0.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/resnet152-multi.0.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/resnet152.D0.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/resnet152.D0.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/resnet50.D0.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/rwkv.D0.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/rwkv.D0.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/stargan.D0.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/stargan.D0.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/super-slomo.D0.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/super-slomo.D0.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/t5.D0.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/tf32.D0.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/jofevozu.2024-04-01_14:01:06.047754/whisper.D0.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/bert-fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/bert-fp32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/bert-tf32-fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/bert-tf32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/bf16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/convnext_large-fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/convnext_large-fp16.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/convnext_large-fp32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/convnext_large-tf32-fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/convnext_large-tf32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/davit_large-multi.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/davit_large.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/dlrm.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/focalnet.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/fp32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/llama.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/llama.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/opt-1_3b-multinode.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/opt-1_3b-multinode.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/opt-1_3b.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/opt-1_3b.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/opt-6_7b-multinode.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/opt-6_7b-multinode.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/opt-6_7b.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/opt-6_7b.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/reformer.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/regnet_y_128gf.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/resnet152-multi.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/resnet152.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/resnet50.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/rwkv.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/rwkv.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/stargan.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/super-slomo.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/super-slomo.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/t5.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/tf32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:30:30.902769/whisper.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/bert-fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/bert-fp32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/bert-tf32-fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/bert-tf32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/bf16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/convnext_large-fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/convnext_large-fp32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/convnext_large-tf32-fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/convnext_large-tf32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/davit_large-multi.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/davit_large.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/dlrm.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/focalnet.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/fp16.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/fp32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/llama.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/llama.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/opt-1_3b-multinode.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/opt-1_3b-multinode.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/opt-1_3b.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/opt-1_3b.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/opt-6_7b-multinode.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/opt-6_7b-multinode.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/opt-6_7b.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/opt-6_7b.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/reformer.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/regnet_y_128gf.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/resnet152-multi.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/resnet152.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/resnet50.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/rwkv.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/stargan.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/super-slomo.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/super-slomo.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/t5.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/t5.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/tf32.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/whisper.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_13:58:13.259400/whisper.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_16:27:35.402196/\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_16:27:35.402196/remote.data\n", "pipe": "stdout"} +{"event": "line", "data": "runs/prepare.2024-04-01_16:27:35.402196/remote.stderr\n", "pipe": "stdout"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "line", "data": "sent 5934 bytes received 2868976 bytes 1916606.67 bytes/sec\n", "pipe": "stdout"} +{"event": "line", "data": "total size is 2849012 speedup is 0.99\n", "pipe": "stdout"} +{"event": "end", "data": {"command": ["rsync", "--force", "-av", "-e", "ssh -i/Users/satyaortiz-gagne/.ssh/covalent-azure-task-0-azure__a100-a6186297d965e74b5580bfc38354b9fb/id_rsa.covalent.electrictortoise.pem -oCheckHostIP=no -oStrictHostKeyChecking=no", "--include=*/.git/*", "--exclude=*/.*/*", "--exclude=*/venv/*", "--exclude=*/env/*", "--exclude=*/tmp/*", "ubuntu@electrictortoise.eastus2.cloudapp.azure.com:/Users/satyaortiz-gagne/travail/mila/milabench/runs", "/Users/satyaortiz-gagne/travail/mila/milabench"], "time": 1711997008.896395, "return_code": 0}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/resnet152-multi.0.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/resnet152-multi.0.data new file mode 100644 index 000000000..2b9703720 --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/resnet152-multi.0.data @@ -0,0 +1,300 @@ +{"event": "config", "data": {"argv": {"--amp": true, "--batch-size": 256, "--model": "resnet152"}, "capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "group": "timm", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 600, "name": "resnet152-multi", "plan": {"method": "njobs", "n": 1}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["resnet152-multi", "0"], "tags": ["classification", "convnet", "multigpu", "resnet", "vision"], "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 60}}, "weight": 5.0, "job-number": 0, "devices": ["0"]}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.14, "memory": 0.010771942138671876}, "temperature": 69, "power": 87.739, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711996396.695941, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152-multi.0-ce1dc503950f5ff93fdadb45d1d68afe.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--batch-size", "256", "--model", "resnet152", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/jofevozu.2024-04-01_14:01:06.047754/resnet152-multi.0", "--checkpoint-hist", "1"], "time": 1711996396.7124786}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model resnet152 created, param count:60192808\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.1) calculated from base learning rate (0.1) and global batch size (256) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.928679466247559}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [24052.375, 81920.0], "load": 1.0, "temperature": 68, "power": 311.154}}}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 0/16 ( 0%)] Loss: 6.929 (6.93) Time: 3.475s, 73.66/s (3.475s, 73.66/s) LR: 1.000e-05 Data: 1.273 (1.273)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.93458366394043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958017349243164}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27188.375, 81920.0], "load": 1.0, "temperature": 69, "power": 140.823}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.951747417449951}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938846111297607}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27188.375, 81920.0], "load": 0.94, "temperature": 69, "power": 159.968}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.954232215881348}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 15/16 (100%)] Loss: 6.934 (6.94) Time: 0.360s, 710.64/s (0.584s, 438.02/s) LR: 1.000e-05 Data: 0.000 (0.096)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.164 (1.164) Loss: 6.9339 (6.9339) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.242 (0.254) Loss: 6.8979 (6.9128) Acc@1: 0.0000 ( 0.1453) Acc@5: 28.1250 ( 0.5329)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/jofevozu.2024-04-01_14:01:06.047754/resnet152-multi.0/20240401-183321-resnet152-224/checkpoint-0.pth.tar', 0.14534883720930233)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 710.0031465989712, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27672.375, 81920.0], "load": 0.95, "temperature": 65, "power": 79.952}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4978.375, 81920.0], "load": 1.0, "temperature": 65, "power": 82.643}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.963787078857422}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 0/16 ( 0%)] Loss: 6.964 (6.96) Time: 1.461s, 175.20/s (1.461s, 175.20/s) LR: 2.001e-02 Data: 1.097 (1.097)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.906603813171387}, "pipe": "data"} +{"event": "data", "data": {"rate": 684.8408293256582, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27250.375, 81920.0], "load": 0.94, "temperature": 68, "power": 221.632}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 634.7386206300895, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.927865982055664}, "pipe": "data"} +{"event": "data", "data": {"rate": 627.0998935982952, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.903002738952637}, "pipe": "data"} +{"event": "data", "data": {"rate": 629.4321912364355, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.896069049835205}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27250.375, 81920.0], "load": 0.99, "temperature": 68, "power": 304.42}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 627.7821924298918, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97745418548584}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 15/16 (100%)] Loss: 6.977 (6.92) Time: 0.365s, 700.52/s (0.444s, 576.42/s) LR: 2.001e-02 Data: 0.001 (0.080)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.986 (0.986) Loss: 6.8469 (6.8469) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.235) Loss: 6.5627 (6.8341) Acc@1: 18.7500 ( 0.2422) Acc@5: 28.1250 ( 1.2112)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/jofevozu.2024-04-01_14:01:06.047754/resnet152-multi.0/20240401-183321-resnet152-224/checkpoint-1.pth.tar', 0.24224806201550386)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 700.4978372457634, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27734.375, 81920.0], "load": 0, "temperature": 65, "power": 79.756}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27734.375, 81920.0], "load": 0.03, "temperature": 64, "power": 79.607}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83317756652832}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 0/16 ( 0%)] Loss: 6.833 (6.83) Time: 1.455s, 175.92/s (1.455s, 175.92/s) LR: 4.001e-02 Data: 1.092 (1.092)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 701.6378348423361, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8565263748168945}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28218.375, 81920.0], "load": 0.94, "temperature": 68, "power": 303.373}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 685.5882452124622, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.906831741333008}, "pipe": "data"} +{"event": "data", "data": {"rate": 622.3363525507433, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944545745849609}, "pipe": "data"} +{"event": "data", "data": {"rate": 625.4771828953335, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97769832611084}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28218.375, 81920.0], "load": 0.94, "temperature": 69, "power": 310.027}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 628.1835676581823, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.945409774780273}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 15/16 (100%)] Loss: 6.945 (6.92) Time: 0.366s, 699.85/s (0.444s, 576.20/s) LR: 4.001e-02 Data: 0.000 (0.079)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.979 (0.979) Loss: 6.7966 (6.7966) Acc@1: 2.7344 ( 2.7344) Acc@5: 3.1250 ( 3.1250)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.234) Loss: 6.3497 (6.8031) Acc@1: 0.0000 ( 0.1696) Acc@5: 28.1250 ( 1.1386)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 699.7379743687729, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28702.375, 81920.0], "load": 0, "temperature": 67, "power": 80.061}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28702.375, 81920.0], "load": 0.03, "temperature": 64, "power": 78.9}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.854823112487793}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 0/16 ( 0%)] Loss: 6.855 (6.85) Time: 1.454s, 176.12/s (1.454s, 176.12/s) LR: 6.000e-02 Data: 1.089 (1.089)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 687.8481663733182, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.913714408874512}, "pipe": "data"} +{"event": "data", "data": {"rate": 689.7738431225596, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29186.375, 81920.0], "load": 0.97, "temperature": 68, "power": 277.237}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96059513092041}, "pipe": "data"} +{"event": "data", "data": {"rate": 621.1344713706548, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.989835739135742}, "pipe": "data"} +{"event": "data", "data": {"rate": 622.9653469133895, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.055400371551514}, "pipe": "data"} +{"event": "data", "data": {"rate": 624.4414846568532, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29186.375, 81920.0], "load": 0.98, "temperature": 68, "power": 272.317}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0439605712890625}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 15/16 (100%)] Loss: 7.044 (6.96) Time: 0.365s, 700.60/s (0.445s, 575.70/s) LR: 6.000e-02 Data: 0.000 (0.080)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 630.3719100944966, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.034 (1.034) Loss: 6.9061 (6.9061) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.3906 ( 0.3906)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.017 (0.238) Loss: 6.3740 (6.8208) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 1.1386)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/jofevozu.2024-04-01_14:01:06.047754/resnet152-multi.0/20240401-183321-resnet152-224/checkpoint-3.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 700.4892443994634, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29670.375, 81920.0], "load": 0.95, "temperature": 68, "power": 321.094}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29670.375, 81920.0], "load": 0, "temperature": 63, "power": 78.527}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8941755294799805}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 0/16 ( 0%)] Loss: 6.894 (6.89) Time: 1.420s, 180.30/s (1.420s, 180.30/s) LR: 8.000e-02 Data: 1.055 (1.055)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.94313907623291}, "pipe": "data"} +{"event": "data", "data": {"rate": 640.410578429016, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30154.375, 81920.0], "load": 0.94, "temperature": 68, "power": 302.654}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.969323635101318}, "pipe": "data"} +{"event": "data", "data": {"rate": 693.2117001458597, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 699.8495424633833, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.045717716217041}, "pipe": "data"} +{"event": "data", "data": {"rate": 605.6431746752029, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.071439266204834}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30154.375, 81920.0], "load": 0.94, "temperature": 69, "power": 317.45}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 625.0743227002497, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.041465759277344}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 15/16 (100%)] Loss: 7.041 (7.00) Time: 0.364s, 702.43/s (0.443s, 578.32/s) LR: 8.000e-02 Data: 0.000 (0.077)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.971 (0.971) Loss: 6.8080 (6.8080) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.019 (0.234) Loss: 6.4987 (6.8416) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 1.1386)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 701.4812620818502, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30638.375, 81920.0], "load": 0.9, "temperature": 69, "power": 185.385}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30638.375, 81920.0], "load": 0, "temperature": 64, "power": 79.509}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8761749267578125}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 0/16 ( 0%)] Loss: 6.876 (6.88) Time: 1.458s, 175.55/s (1.458s, 175.55/s) LR: 9.993e-02 Data: 1.093 (1.093)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 574.2290899044388, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.980376720428467}, "pipe": "data"} +{"event": "data", "data": {"rate": 620.262301965703, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.090250015258789}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31122.375, 81920.0], "load": 0.94, "temperature": 68, "power": 320.102}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 621.0422646334645, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.106000900268555}, "pipe": "data"} +{"event": "data", "data": {"rate": 622.848405461855, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.111999988555908}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31122.375, 81920.0], "load": 0.93, "temperature": 70, "power": 319.512}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 624.8211261388556, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.082710266113281}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 15/16 (100%)] Loss: 7.083 (7.04) Time: 0.366s, 699.00/s (0.446s, 574.56/s) LR: 9.993e-02 Data: 0.001 (0.080)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.032 (1.032) Loss: 6.9186 (6.9186) Acc@1: 0.3906 ( 0.3906) Acc@5: 0.3906 ( 0.3906)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.240) Loss: 6.1925 (6.8703) Acc@1: 3.1250 ( 0.2422) Acc@5: 3.1250 ( 1.1628)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 698.6818883104, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31606.375, 81920.0], "load": 0.68, "temperature": 67, "power": 79.412}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31606.375, 81920.0], "load": 0, "temperature": 65, "power": 80.094}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.899833679199219}, "pipe": "data"} +{"event": "line", "data": "Train: 6 [ 0/16 ( 0%)] Loss: 6.900 (6.90) Time: 1.452s, 176.35/s (1.452s, 176.35/s) LR: 9.990e-02 Data: 1.088 (1.088)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 643.4960974054311, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.023956298828125}, "pipe": "data"} +{"event": "data", "data": {"rate": 655.769884507076, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.008710861206055}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32090.375, 81920.0], "load": 0.99, "temperature": 70, "power": 323.347}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 620.9490291951896, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.024489402770996}, "pipe": "data"} +{"event": "data", "data": {"rate": 624.2581112989884, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.100464820861816}, "pipe": "data"} +{"event": "data", "data": {"rate": 624.5387399200993, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32090.375, 81920.0], "load": 0.93, "temperature": 71, "power": 327.231}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.141305923461914}, "pipe": "data"} +{"event": "line", "data": "Train: 6 [ 15/16 (100%)] Loss: 7.141 (7.04) Time: 0.365s, 701.89/s (0.445s, 575.19/s) LR: 9.990e-02 Data: 0.000 (0.079)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.991 (0.991) Loss: 6.8153 (6.8153) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.232) Loss: 6.5583 (6.8759) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 1.1143)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 700.4614374540731, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32574.375, 81920.0], "load": 0.75, "temperature": 66, "power": 80.669}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32574.375, 81920.0], "load": 0, "temperature": 65, "power": 80.572}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.866184234619141}, "pipe": "data"} +{"event": "line", "data": "Train: 7 [ 0/16 ( 0%)] Loss: 6.866 (6.87) Time: 1.443s, 177.44/s (1.443s, 177.44/s) LR: 9.987e-02 Data: 1.077 (1.077)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 629.4979039146006, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9307708740234375}, "pipe": "data"} +{"event": "data", "data": {"rate": 697.6716958597333, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.998529434204102}, "pipe": "data"} +{"event": "data", "data": {"rate": 597.3014657073339, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33058.375, 81920.0], "load": 0.93, "temperature": 70, "power": 249.279}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.059999465942383}, "pipe": "data"} +{"event": "data", "data": {"rate": 618.5418925920126, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1116180419921875}, "pipe": "data"} +{"event": "data", "data": {"rate": 622.7790214103595, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33058.375, 81920.0], "load": 0.93, "temperature": 71, "power": 258.778}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.072312355041504}, "pipe": "data"} +{"event": "line", "data": "Train: 7 [ 15/16 (100%)] Loss: 7.072 (7.01) Time: 0.368s, 695.88/s (0.445s, 575.02/s) LR: 9.987e-02 Data: 0.000 (0.079)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 629.5553050031095, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.028 (1.028) Loss: 6.8157 (6.8157) Acc@1: 0.0000 ( 0.0000) Acc@5: 2.7344 ( 2.7344)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.017 (0.241) Loss: 6.5187 (6.8445) Acc@1: 0.0000 ( 0.1453) Acc@5: 0.0000 ( 1.0174)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 695.8349421458502, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33542.375, 81920.0], "load": 0, "temperature": 66, "power": 81.744}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33542.375, 81920.0], "load": 0, "temperature": 65, "power": 80.766}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.889354705810547}, "pipe": "data"} +{"event": "line", "data": "Train: 8 [ 0/16 ( 0%)] Loss: 6.889 (6.89) Time: 1.421s, 180.14/s (1.421s, 180.14/s) LR: 9.982e-02 Data: 1.056 (1.056)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.939812660217285}, "pipe": "data"} +{"event": "data", "data": {"rate": 631.9501583214117, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.956624984741211}, "pipe": "data"} +{"event": "data", "data": {"rate": 635.8687680325615, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34026.375, 81920.0], "load": 0.99, "temperature": 69, "power": 304.758}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.997289657592773}, "pipe": "data"} +{"event": "data", "data": {"rate": 695.9541001089611, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.963284015655518}, "pipe": "data"} +{"event": "data", "data": {"rate": 659.2181929224033, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 665.2711210259262, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.044023513793945}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34026.375, 81920.0], "load": 0.99, "temperature": 71, "power": 217.926}}}, "pipe": "data"} +{"event": "line", "data": "Train: 8 [ 15/16 (100%)] Loss: 7.044 (6.96) Time: 0.367s, 696.85/s (0.443s, 577.74/s) LR: 9.982e-02 Data: 0.000 (0.077)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.997 (0.997) Loss: 6.7763 (6.7763) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.235) Loss: 6.6820 (6.8307) Acc@1: 0.0000 ( 0.1938) Acc@5: 0.0000 ( 1.0901)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 697.7570395542522, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34510.375, 81920.0], "load": 0.2, "temperature": 70, "power": 81.841}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34510.375, 81920.0], "load": 0, "temperature": 66, "power": 81.072}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8923869132995605}, "pipe": "data"} +{"event": "line", "data": "Train: 9 [ 0/16 ( 0%)] Loss: 6.892 (6.89) Time: 1.435s, 178.39/s (1.435s, 178.39/s) LR: 9.978e-02 Data: 1.070 (1.070)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 698.329630440468, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.862155914306641}, "pipe": "data"} +{"event": "data", "data": {"rate": 698.2240391547167, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.924525260925293}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34994.375, 81920.0], "load": 0.92, "temperature": 71, "power": 316.376}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 597.1611400275962, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.917932033538818}, "pipe": "data"} +{"event": "data", "data": {"rate": 622.1525105374752, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.037782192230225}, "pipe": "data"} +{"event": "data", "data": {"rate": 622.8835796768952, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.994927406311035}, "pipe": "data"} +{"event": "line", "data": "Train: 9 [ 15/16 (100%)] Loss: 6.995 (6.93) Time: 0.366s, 700.41/s (0.444s, 576.35/s) LR: 9.978e-02 Data: 0.000 (0.078)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34994.375, 81920.0], "load": 0.99, "temperature": 72, "power": 306.453}}}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.021 (1.021) Loss: 6.7878 (6.7878) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.3906 ( 0.3906)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.020 (0.241) Loss: 6.4092 (6.8103) Acc@1: 0.0000 ( 0.2422) Acc@5: 3.1250 ( 1.2112)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 700.4404667889587, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35478.375, 81920.0], "load": 0.81, "temperature": 70, "power": 222.944}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35478.375, 81920.0], "load": 0, "temperature": 66, "power": 80.952}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.837471961975098}, "pipe": "data"} +{"event": "line", "data": "Train: 10 [ 0/16 ( 0%)] Loss: 6.837 (6.84) Time: 1.410s, 181.50/s (1.410s, 181.50/s) LR: 9.973e-02 Data: 1.045 (1.045)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.8888044357299805}, "pipe": "data"} +{"event": "data", "data": {"rate": 619.3570079576496, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.881709098815918}, "pipe": "data"} +{"event": "data", "data": {"rate": 621.3177815054813, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35962.375, 81920.0], "load": 0.99, "temperature": 71, "power": 309.081}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.894359111785889}, "pipe": "data"} +{"event": "data", "data": {"rate": 619.6475047224624, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.930902481079102}, "pipe": "data"} +{"event": "data", "data": {"rate": 620.2635377980774, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "stop", "data": null, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152-multi.0-ce1dc503950f5ff93fdadb45d1d68afe.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--batch-size", "256", "--model", "resnet152", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/jofevozu.2024-04-01_14:01:06.047754/resnet152-multi.0", "--checkpoint-hist", "1"], "time": 1711996529.271445, "return_code": -15}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/resnet152.D0.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/resnet152.D0.data new file mode 100644 index 000000000..ebf64a076 --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/resnet152.D0.data @@ -0,0 +1,302 @@ +{"event": "config", "data": {"argv": {"--amp": true, "--batch-size": 256, "--model": "resnet152"}, "capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "group": "timm", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 600, "name": "resnet152", "plan": {"method": "per_gpu"}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["resnet152", "D0"], "tags": ["classification", "convnet", "resnet", "vision"], "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 60}}, "weight": 1.0, "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 73, "power": 92.892, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711996261.351693, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152.D0-ce1dc503950f5ff93fdadb45d1d68afe.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--batch-size", "256", "--model", "resnet152", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/jofevozu.2024-04-01_14:01:06.047754/resnet152.D0", "--checkpoint-hist", "1"], "time": 1711996261.3680012}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Training with a single process on 1 device (cuda:0).\n", "pipe": "stderr"} +{"event": "line", "data": "Model resnet152 created, param count:60192808\n", "pipe": "stderr"} +{"event": "line", "data": "Data processing configuration for current model + dataset:\n", "pipe": "stderr"} +{"event": "line", "data": "\tinput_size: (3, 224, 224)\n", "pipe": "stderr"} +{"event": "line", "data": "\tinterpolation: bicubic\n", "pipe": "stderr"} +{"event": "line", "data": "\tmean: (0.485, 0.456, 0.406)\n", "pipe": "stderr"} +{"event": "line", "data": "\tstd: (0.229, 0.224, 0.225)\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_pct: 0.95\n", "pipe": "stderr"} +{"event": "line", "data": "\tcrop_mode: center\n", "pipe": "stderr"} +{"event": "line", "data": "Learning rate (0.1) calculated from base learning rate (0.1) and global batch size (256) with linear scaling.\n", "pipe": "stderr"} +{"event": "line", "data": "Using native Torch AMP. Training in mixed precision.\n", "pipe": "stderr"} +{"event": "line", "data": "Scheduled epochs: 300. LR stepped per epoch.\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.928679466247559}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [23866.375, 81920.0], "load": 1.0, "temperature": 73, "power": 309.77}}}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 0/16 ( 0%)] Loss: 6.929 (6.93) Time: 3.452s, 74.15/s (3.452s, 74.15/s) LR: 1.000e-05 Data: 1.255 (1.255)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.93458366394043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958017349243164}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27174.375, 81920.0], "load": 1.0, "temperature": 73, "power": 300.832}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.951747417449951}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938842296600342}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27174.375, 81920.0], "load": 0.94, "temperature": 74, "power": 282.744}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.954232215881348}, "pipe": "data"} +{"event": "line", "data": "Train: 0 [ 15/16 (100%)] Loss: 6.934 (6.94) Time: 0.361s, 708.84/s (0.584s, 438.34/s) LR: 1.000e-05 Data: 0.000 (0.095)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.155 (1.155) Loss: 6.9339 (6.9339) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.240 (0.253) Loss: 6.8979 (6.9128) Acc@1: 0.0000 ( 0.1453) Acc@5: 28.1250 ( 0.5329)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/jofevozu.2024-04-01_14:01:06.047754/resnet152.D0/20240401-183105-resnet152-224/checkpoint-0.pth.tar', 0.14534883720930233)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 708.7657010602412, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27658.375, 81920.0], "load": 0.16, "temperature": 70, "power": 85.095}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4978.375, 81920.0], "load": 1.0, "temperature": 69, "power": 85.907}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.963784694671631}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 0/16 ( 0%)] Loss: 6.964 (6.96) Time: 1.431s, 178.84/s (1.431s, 178.84/s) LR: 2.001e-02 Data: 1.066 (1.066)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.906681537628174}, "pipe": "data"} +{"event": "data", "data": {"rate": 690.1329953138604, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27236.375, 81920.0], "load": 0.92, "temperature": 73, "power": 317.938}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 622.5620850252081, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.927966594696045}, "pipe": "data"} +{"event": "data", "data": {"rate": 624.1228747370714, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.902280807495117}, "pipe": "data"} +{"event": "data", "data": {"rate": 626.034935400262, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.895678520202637}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27236.375, 81920.0], "load": 0.93, "temperature": 74, "power": 325.564}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 626.3782370401499, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.978145122528076}, "pipe": "data"} +{"event": "line", "data": "Train: 1 [ 15/16 (100%)] Loss: 6.978 (6.92) Time: 0.365s, 701.56/s (0.443s, 577.88/s) LR: 2.001e-02 Data: 0.000 (0.078)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.010 (1.010) Loss: 6.8472 (6.8472) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.019 (0.236) Loss: 6.5629 (6.8343) Acc@1: 15.6250 ( 0.2422) Acc@5: 28.1250 ( 1.1870)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/jofevozu.2024-04-01_14:01:06.047754/resnet152.D0/20240401-183105-resnet152-224/checkpoint-1.pth.tar', 0.24224806201550386)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 701.1441234200994, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27720.375, 81920.0], "load": 0, "temperature": 69, "power": 85.193}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [27720.375, 81920.0], "load": 0.03, "temperature": 69, "power": 85.02}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.833061218261719}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 0/16 ( 0%)] Loss: 6.833 (6.83) Time: 1.401s, 182.76/s (1.401s, 182.76/s) LR: 4.001e-02 Data: 1.036 (1.036)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 599.0981039070392, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.860068321228027}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28204.375, 81920.0], "load": 0.99, "temperature": 73, "power": 267.35}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 637.6679457640795, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.900865077972412}, "pipe": "data"} +{"event": "data", "data": {"rate": 698.139625161563, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.946653842926025}, "pipe": "data"} +{"event": "data", "data": {"rate": 665.888891101004, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97684383392334}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28204.375, 81920.0], "load": 0.99, "temperature": 74, "power": 278.234}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 655.2430727647957, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.946779251098633}, "pipe": "data"} +{"event": "line", "data": "Train: 2 [ 15/16 (100%)] Loss: 6.947 (6.92) Time: 0.366s, 700.16/s (0.442s, 579.49/s) LR: 4.001e-02 Data: 0.000 (0.076)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.979 (0.979) Loss: 6.8070 (6.8070) Acc@1: 0.0000 ( 0.0000) Acc@5: 3.5156 ( 3.5156)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.234) Loss: 6.3842 (6.8019) Acc@1: 0.0000 ( 0.2422) Acc@5: 9.3750 ( 1.1386)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 700.6724373452547, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28688.375, 81920.0], "load": 0.55, "temperature": 72, "power": 275.563}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [28688.375, 81920.0], "load": 0, "temperature": 69, "power": 84.705}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.852975845336914}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 0/16 ( 0%)] Loss: 6.853 (6.85) Time: 1.424s, 179.82/s (1.424s, 179.82/s) LR: 6.000e-02 Data: 1.060 (1.060)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 548.8660222156267, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.918535232543945}, "pipe": "data"} +{"event": "data", "data": {"rate": 687.9762718417126, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29172.375, 81920.0], "load": 0.99, "temperature": 73, "power": 329.164}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.964803695678711}, "pipe": "data"} +{"event": "data", "data": {"rate": 698.7358699912825, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.993415832519531}, "pipe": "data"} +{"event": "data", "data": {"rate": 602.6205841199439, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.048032760620117}, "pipe": "data"} +{"event": "data", "data": {"rate": 624.2952713347266, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29172.375, 81920.0], "load": 0.99, "temperature": 74, "power": 318.118}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.039599418640137}, "pipe": "data"} +{"event": "line", "data": "Train: 3 [ 15/16 (100%)] Loss: 7.040 (6.96) Time: 0.366s, 699.10/s (0.443s, 577.30/s) LR: 6.000e-02 Data: 0.000 (0.078)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 629.485714793799, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.986 (0.986) Loss: 6.9012 (6.9012) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.017 (0.232) Loss: 6.4220 (6.8221) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 1.2839)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/jofevozu.2024-04-01_14:01:06.047754/resnet152.D0/20240401-183105-resnet152-224/checkpoint-3.pth.tar', 0.26647286821705424)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 698.8532064572616, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29656.375, 81920.0], "load": 0.9, "temperature": 73, "power": 310.414}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29656.375, 81920.0], "load": 0, "temperature": 68, "power": 84.107}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.870950698852539}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 0/16 ( 0%)] Loss: 6.871 (6.87) Time: 1.451s, 176.38/s (1.451s, 176.38/s) LR: 8.000e-02 Data: 1.086 (1.086)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.936266899108887}, "pipe": "data"} +{"event": "data", "data": {"rate": 655.6390243033418, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"rate": 697.9956519542365, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9650774002075195}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30140.375, 81920.0], "load": 0.91, "temperature": 72, "power": 299.15}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 593.7529927919558, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.040359020233154}, "pipe": "data"} +{"event": "data", "data": {"rate": 621.241641759946, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.062357425689697}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30140.375, 81920.0], "load": 0.93, "temperature": 73, "power": 308.785}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 619.7973190134329, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.044069290161133}, "pipe": "data"} +{"event": "line", "data": "Train: 4 [ 15/16 (100%)] Loss: 7.044 (7.00) Time: 0.367s, 698.33/s (0.446s, 573.57/s) LR: 8.000e-02 Data: 0.001 (0.080)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.029 (1.029) Loss: 6.8421 (6.8421) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.019 (0.239) Loss: 6.5166 (6.8615) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 1.1386)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 698.5217801594545, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30624.375, 81920.0], "load": 0.89, "temperature": 73, "power": 328.761}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [30624.375, 81920.0], "load": 0, "temperature": 68, "power": 83.913}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.846671104431152}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 0/16 ( 0%)] Loss: 6.847 (6.85) Time: 1.463s, 174.93/s (1.463s, 174.93/s) LR: 9.993e-02 Data: 1.098 (1.098)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 620.6495045667755, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.964328765869141}, "pipe": "data"} +{"event": "data", "data": {"rate": 698.0975203676973, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.083396911621094}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31108.375, 81920.0], "load": 0.99, "temperature": 73, "power": 285.139}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 594.1276623761315, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.120195388793945}, "pipe": "data"} +{"event": "data", "data": {"rate": 620.8416985015643, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.099678993225098}, "pipe": "data"} +{"event": "data", "data": {"rate": 623.1644449877804, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31108.375, 81920.0], "load": 0.99, "temperature": 73, "power": 258.677}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.096619606018066}, "pipe": "data"} +{"event": "line", "data": "Train: 5 [ 15/16 (100%)] Loss: 7.097 (7.04) Time: 0.366s, 698.89/s (0.447s, 573.16/s) LR: 9.993e-02 Data: 0.000 (0.080)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.030 (1.030) Loss: 6.9648 (6.9648) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.237) Loss: 6.4128 (6.8967) Acc@1: 0.0000 ( 0.2422) Acc@5: 0.0000 ( 1.1870)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 699.0442373188303, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31592.375, 81920.0], "load": 0.95, "temperature": 72, "power": 273.275}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31592.375, 81920.0], "load": 0, "temperature": 68, "power": 83.913}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.902570724487305}, "pipe": "data"} +{"event": "line", "data": "Train: 6 [ 0/16 ( 0%)] Loss: 6.903 (6.90) Time: 1.473s, 173.74/s (1.473s, 173.74/s) LR: 9.990e-02 Data: 1.107 (1.107)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 7.02291202545166}, "pipe": "data"} +{"event": "data", "data": {"rate": 617.9929949576688, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0034403800964355}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32076.375, 81920.0], "load": 0.93, "temperature": 73, "power": 268.697}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 618.7224320335322, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.024184226989746}, "pipe": "data"} +{"event": "data", "data": {"rate": 620.1224788243375, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.103824615478516}, "pipe": "data"} +{"event": "data", "data": {"rate": 619.7195406607133, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32076.375, 81920.0], "load": 0.94, "temperature": 72, "power": 270.225}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.143259048461914}, "pipe": "data"} +{"event": "line", "data": "Train: 6 [ 15/16 (100%)] Loss: 7.143 (7.04) Time: 0.368s, 695.27/s (0.448s, 571.84/s) LR: 9.990e-02 Data: 0.000 (0.081)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 627.1558246697895, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.035 (1.035) Loss: 6.8322 (6.8322) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.019 (0.240) Loss: 6.5028 (6.8828) Acc@1: 0.0000 ( 0.2665) Acc@5: 0.0000 ( 1.0174)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 695.3253596571781, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32560.375, 81920.0], "load": 0.75, "temperature": 73, "power": 83.717}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [32560.375, 81920.0], "load": 0, "temperature": 68, "power": 84.01}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.868475914001465}, "pipe": "data"} +{"event": "line", "data": "Train: 7 [ 0/16 ( 0%)] Loss: 6.868 (6.87) Time: 1.449s, 176.64/s (1.449s, 176.64/s) LR: 9.987e-02 Data: 1.084 (1.084)\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 6.928178787231445}, "pipe": "data"} +{"event": "data", "data": {"rate": 615.9068940669662, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.986416816711426}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33044.375, 81920.0], "load": 0.98, "temperature": 73, "power": 311.777}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 609.5038177985857, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.030221939086914}, "pipe": "data"} +{"event": "data", "data": {"rate": 615.7622389786576, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.082182884216309}, "pipe": "data"} +{"event": "data", "data": {"rate": 621.4151491561057, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33044.375, 81920.0], "load": 0.93, "temperature": 73, "power": 244.532}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0538482666015625}, "pipe": "data"} +{"event": "line", "data": "Train: 7 [ 15/16 (100%)] Loss: 7.054 (7.00) Time: 0.368s, 695.99/s (0.446s, 574.01/s) LR: 9.987e-02 Data: 0.000 (0.080)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 629.0021603586987, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.033 (1.033) Loss: 6.8201 (6.8201) Acc@1: 0.0000 ( 0.0000) Acc@5: 1.5625 ( 1.5625)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.019 (0.241) Loss: 6.5327 (6.8447) Acc@1: 0.0000 ( 0.2422) Acc@5: 0.0000 ( 0.8236)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 695.9043200932834, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0.88, "temperature": 70, "power": 83.815}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33528.375, 81920.0], "load": 0, "temperature": 68, "power": 83.913}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.882312774658203}, "pipe": "data"} +{"event": "line", "data": "Train: 8 [ 0/16 ( 0%)] Loss: 6.882 (6.88) Time: 1.445s, 177.17/s (1.445s, 177.17/s) LR: 9.982e-02 Data: 1.080 (1.080)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 558.4193135113178, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.917478084564209}, "pipe": "data"} +{"event": "data", "data": {"rate": 698.8004338069381, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34012.375, 81920.0], "load": 0.99, "temperature": 72, "power": 250.301}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93463659286499}, "pipe": "data"} +{"event": "data", "data": {"rate": 598.87690461863, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9897003173828125}, "pipe": "data"} +{"event": "data", "data": {"rate": 619.1093029558153, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.934311389923096}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34012.375, 81920.0], "load": 1.0, "temperature": 73, "power": 307.245}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 620.2890325105864, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.030545234680176}, "pipe": "data"} +{"event": "line", "data": "Train: 8 [ 15/16 (100%)] Loss: 7.031 (6.95) Time: 0.368s, 696.33/s (0.445s, 574.65/s) LR: 9.982e-02 Data: 0.000 (0.079)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 628.5003762765465, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "line", "data": "Test: [ 0/16] Time: 1.034 (1.034) Loss: 6.8057 (6.8057) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.0000 ( 0.0000)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.017 (0.240) Loss: 6.6677 (6.8198) Acc@1: 0.0000 ( 0.2907) Acc@5: 0.0000 ( 1.0174)\n", "pipe": "stderr"} +{"event": "line", "data": "Current checkpoints:\n", "pipe": "stderr"} +{"event": "line", "data": " ('/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/jofevozu.2024-04-01_14:01:06.047754/resnet152.D0/20240401-183105-resnet152-224/checkpoint-8.pth.tar', 0.29069767441860467)\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 696.1201009266464, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34496.375, 81920.0], "load": 0, "temperature": 69, "power": 83.718}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34496.375, 81920.0], "load": 0.03, "temperature": 68, "power": 83.913}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9012908935546875}, "pipe": "data"} +{"event": "line", "data": "Train: 9 [ 0/16 ( 0%)] Loss: 6.901 (6.90) Time: 1.477s, 173.32/s (1.477s, 173.32/s) LR: 9.978e-02 Data: 1.111 (1.111)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 699.5287423330592, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.855483055114746}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34980.375, 81920.0], "load": 0.99, "temperature": 72, "power": 261.648}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 599.2509777104491, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.904693603515625}, "pipe": "data"} +{"event": "data", "data": {"rate": 618.8207619577267, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.908544540405273}, "pipe": "data"} +{"event": "data", "data": {"rate": 622.3811933622579, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.027878761291504}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [34980.375, 81920.0], "load": 0.99, "temperature": 72, "power": 252.822}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 624.9997690837442, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98478889465332}, "pipe": "data"} +{"event": "line", "data": "Train: 9 [ 15/16 (100%)] Loss: 6.985 (6.92) Time: 0.367s, 698.06/s (0.447s, 572.63/s) LR: 9.978e-02 Data: 0.001 (0.081)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 0/16] Time: 0.990 (0.990) Loss: 6.7980 (6.7980) Acc@1: 0.0000 ( 0.0000) Acc@5: 0.3906 ( 0.3906)\n", "pipe": "stderr"} +{"event": "line", "data": "Test: [ 16/16] Time: 0.018 (0.233) Loss: 6.4200 (6.8048) Acc@1: 0.0000 ( 0.2907) Acc@5: 3.1250 ( 1.2355)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 698.324900510047, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35464.375, 81920.0], "load": 0, "temperature": 69, "power": 83.912}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35464.375, 81920.0], "load": 0.03, "temperature": 68, "power": 84.108}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.840345859527588}, "pipe": "data"} +{"event": "line", "data": "Train: 10 [ 0/16 ( 0%)] Loss: 6.840 (6.84) Time: 1.447s, 176.86/s (1.447s, 176.86/s) LR: 9.973e-02 Data: 1.082 (1.082)\n", "pipe": "stderr"} +{"event": "data", "data": {"rate": 638.9170507732193, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.884131908416748}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35948.375, 81920.0], "load": 0.94, "temperature": 73, "power": 305.378}}}, "pipe": "data"} +{"event": "data", "data": {"rate": 699.2583601187602, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8733415603637695}, "pipe": "data"} +{"event": "data", "data": {"rate": 596.5048400064854, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89456844329834}, "pipe": "data"} +{"event": "data", "data": {"rate": 623.2482342403699, "units": "items/s", "task": "train"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "stop", "data": null, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/voirconf-resnet152.D0-ce1dc503950f5ff93fdadb45d1d68afe.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/timm/pytorch-image-models/train.py", "--amp", "--batch-size", "256", "--model", "resnet152", "--data-dir", "/Users/satyaortiz-gagne/travail/mila/milabench/data", "--dataset", "FakeImageNet", "--output", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/timm/jofevozu.2024-04-01_14:01:06.047754/resnet152.D0", "--checkpoint-hist", "1"], "time": 1711996394.0321772, "return_code": -15}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/resnet50.D0.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/resnet50.D0.data new file mode 100644 index 000000000..98560d896 --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/resnet50.D0.data @@ -0,0 +1,2189 @@ +{"event": "config", "data": {"argv": {"--batch-size": 64, "--epochs": 50, "--lr": 0.01, "--model": "resnet50", "--no-stdout": true, "--precision": "tf32-fp16"}, "capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "group": "torchvision", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 600, "name": "resnet50", "plan": {"method": "per_gpu"}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["resnet50", "D0"], "tags": ["classification", "convnet", "resnet", "vision"], "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 60}}, "weight": 1.0, "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0.1, "memory": 0.010771942138671876}, "temperature": 68, "power": 87.008, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711996531.953781, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-resnet50.D0-ce1dc503950f5ff93fdadb45d1d68afe.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--batch-size", "64", "--epochs", "50", "--lr", "0.01", "--model", "resnet50", "--no-stdout", "--precision", "tf32-fp16"], "time": 1711996531.9704897}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [0, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0189208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.141357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1485595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07318115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0589599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13543701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06268310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1090087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1446533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08746337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1492919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0975341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11920166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0943603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.185791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11456298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02716064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.13958740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04656982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.994384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99810791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9683837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03021240234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0460205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11968994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.72, "temperature": 68, "power": 308.325}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06439208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01129150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03814697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.12310791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9730224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06427001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89617919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.12481689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9935302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92803955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9180908203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92852783203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0045166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0550537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.053955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08026123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0406494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06060791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01177978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9913330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.995849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0933837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.084716796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04132080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97540283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11065673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03277587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.925048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0076904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.011474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0970458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [1, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90789794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86712646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.74407958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85906982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98822021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88079833984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90875244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.75, "temperature": 67, "power": 231.251}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9716796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9688720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84259033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95147705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.812744140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9739990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99066162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.760498046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9683837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0108642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94647216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.956298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00640869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90313720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94976806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.985595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06951904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92645263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99237060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93511962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1077.2318461171456, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9364013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92181396484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00555419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96148681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99151611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0391845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9122314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94342041015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.933837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.001708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89166259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9281005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9161376953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05157470703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1064.162314025177, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95526123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0093994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.929931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99737548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.005615234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9273681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.965576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93359375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.74, "temperature": 69, "power": 169.526}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14642333984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92083740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9735107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01495361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0101318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.966552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [2, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1088.9669329425692, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.72662353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79376220703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80889892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8753662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86322021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7818603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94903564453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01275634765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.81646728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 704.0376448928723, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8917236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.992919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88751220703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83465576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99383544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.907958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92449951171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.938232421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8594970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.906982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8302001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97406005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9434814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95294189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02996826171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1051.905815837688, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98907470703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92755126953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8988037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9034423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83819580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86309814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97064208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.954345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.76, "temperature": 69, "power": 165.849}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9739990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9886474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9158935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94757080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.961669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9371337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1062.1237519959275, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.976806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96405029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90570068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00738525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96612548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94903564453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0523681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9747314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95806884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.049560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98590087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9737548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.819580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95794677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0809326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97479248046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1076.190974729539, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.883544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98468017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9923095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [3, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.825927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85980224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.786376953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.850341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79498291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 701.9551487293245, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.75146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84417724609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7880859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83636474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87469482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95977783203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96307373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87982177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78558349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.8, "temperature": 69, "power": 152.786}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83172607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8668212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84478759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96240234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8812255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1059.0450939444495, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94757080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94085693359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03179931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91180419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88433837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0032958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.870849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9293212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97796630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93719482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00421142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91351318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98065185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9639892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9403076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1057.2773133588382, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.928466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0362548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99237060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9554443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.949951171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0242919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9603271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92755126953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.036865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8326416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.987060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9608154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9676513671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92315673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85491943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8707275390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1075.023461670698, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00640869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98382568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9918212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05633544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92144775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [4, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.43, "temperature": 67, "power": 84.302}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.75518798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7857666015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8743896484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 757.203746279259, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.75701904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.76666259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.879638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83795166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8651123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88006591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90704345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87725830078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8424072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98822021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84716796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93060302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84649658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78570556640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8453369140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1068.674377886362, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88116455078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.052490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7698974609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0291748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.890380859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9866943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94219970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.936279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94866943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9876708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91876220703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1049.1121291309992, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0609130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0474853515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9068603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.825439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87408447265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.077392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91717529296875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.73, "temperature": 70, "power": 296.992}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93695068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9571533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99969482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05889892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90765380859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9873046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03533935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1055.3566413063818, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.975341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.989990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86065673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.022705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99322509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.998779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91595458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04693603515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00433349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.016357421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10699462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [5, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.776611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1108.719027352904, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93902587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7945556640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83807373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.908935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78363037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9781494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.858154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8026123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88397216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.968505859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.900634765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9215087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9244384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.74603271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 819.9941945094358, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87750244140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.965087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8619384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01373291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8697509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8800048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07879638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.872802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8953857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.78, "temperature": 69, "power": 157.713}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9554443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9736328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99176025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9207763671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9617919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9837646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1059.7984265876532, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93804931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8397216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.818359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08831787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88812255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88226318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.978759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01202392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86712646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9315185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9251708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.035888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9388427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9542236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91827392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00018310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1057.6931184046605, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.949951171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90899658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9385986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97064208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85308837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0299072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99151611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9407958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95916748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8702392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.14739990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02166748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [6, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1101.1780809599672, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.766845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7198486328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8890380859375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0, "temperature": 68, "power": 300.562}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8394775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88006591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8902587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94049072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89520263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8848876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85162353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 749.359561553808, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91583251953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88653564453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8624267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93023681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9425048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8919677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93402099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9014892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98834228515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.906982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9183349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8953857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.992431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87017822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07659912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91363525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95977783203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1069.8298251687738, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95745849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99517822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8734130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96173095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8917236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9034423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79461669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9713134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88519287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05010986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.087646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8721923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97113037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0235595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.935302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88616943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1060.9125557140471, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93988037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92608642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86358642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.975341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.07244873046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8929443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8892822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.72, "temperature": 70, "power": 222.364}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.883056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.1773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0374755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01177978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87158203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01385498046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1090.0872991604326, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95745849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [7, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88031005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7603759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7989501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7025146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.896728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91278076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9241943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8050537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 704.59839161423, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7813720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98284912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.76763916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8951416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8736572265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.825927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.848388671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90838623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85198974609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9759521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04522705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.894287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8397216796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9031982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96685791015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1055.535330498279, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92254638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97930908203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.888427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0098876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87628173828125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.76, "temperature": 70, "power": 271.99}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8970947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00665283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7503662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04925537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8714599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9698486328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96661376953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95611572265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00921630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1068.1852364995298, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99481201171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95672607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9947509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01983642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97576904296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95281982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9803466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99456787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.953857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.923583984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9620361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.881103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10626220703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03509521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.046142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02752685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02606201171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1068.2704107627962, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.869873046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8306884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01446533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [8, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7694091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90570068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9017333984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8131103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 762.1897519119517, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.839599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8763427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7899169921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78131103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95062255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.838623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0047607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.76, "temperature": 70, "power": 263.673}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8917236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9434814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9041748046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7479248046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9053955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84368896484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92041015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97747802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1066.0155986848833, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87054443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93328857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8746337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8553466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95281982421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87518310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95941162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90655517578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9464111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9544677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91143798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93121337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0428466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01019287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97503662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96258544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1055.6110117295611, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0184326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85687255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89337158203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87615966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99981689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91278076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92169189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9564208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0125732421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93475341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79156494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97900390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.011474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89605712890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96893310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95782470703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1076.5255616393654, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9952392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92376708984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86676025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.09295654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8553466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.018798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04425048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.976806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.81, "temperature": 70, "power": 300.441}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9403076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [9, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92926025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1099.0785951876207, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.6580810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8775634765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86627197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77252197265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.763671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80657958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.939697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94830322265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96368408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7374267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80682373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.979248046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90826416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92901611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86505126953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94549560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 849.874246498017, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9329833984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87249755859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9117431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.937255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93511962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03314208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.873779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9061279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0120849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88226318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00732421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.932373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.956298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.996337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1049.3810746834765, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8609619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.944091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04974365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92803955078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8800048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97467041015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88360595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.73, "temperature": 71, "power": 251.928}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91302490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97784423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85040283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94573974609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87774658203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97418212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96795654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04998779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1061.2029136488634, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.832275390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.08526611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98223876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.904541015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01593017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97027587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95758056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.952880859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82525634765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9888916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [10, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7283935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1096.7139672260666, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.839111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8455810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.73211669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.75860595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91790771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87811279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87445068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86419677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8148193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80560302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 764.7726197763675, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84576416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82525634765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90692138671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88763427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98175048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9814453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92071533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.73, "temperature": 70, "power": 195.045}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00933837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79974365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85479736328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98846435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96307373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02642822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9974365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83905029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91107177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1083.852159564918, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9456787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00018310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91265869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84893798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86065673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92889404296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91302490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99627685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85137939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99835205078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88165283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88055419921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85882568359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8804931640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1070.2626000053422, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0657958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.10986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89898681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0123291015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9727783203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.925048828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.923583984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86016845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85345458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92242431640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93914794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.918212890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87127685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1089.9248880066616, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [11, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.75531005859375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0, "temperature": 68, "power": 82.232}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7322998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83526611328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84112548828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95172119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.825439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 721.49160272082, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7950439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9066162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93914794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9088134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.931396484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83197021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00299072265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85174560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9654541015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94476318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9080810546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8575439453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78045654296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8883056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88189697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1069.8563458159915, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91107177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.902099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89544677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.930908203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87200927734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82464599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02545166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00091552734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94940185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8238525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99969482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96783447265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98394775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90972900390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9095458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1060.9011078721937, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93463134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88482666015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9969482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05096435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86676025390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95098876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9444580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.78, "temperature": 70, "power": 293.181}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8795166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.043701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99859619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9552001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02886962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97113037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1069.6639447402797, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9190673828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90521240234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91668701171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02362060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99249267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [12, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82745361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82025146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79949951171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.865966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83087158203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 747.0697897586159, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.925537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82574462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.767822265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86492919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94927978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8480224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.833984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87884521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.030029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86968994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88458251953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9100341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87530517578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88140869140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94757080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.922119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1071.2100096344411, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.949462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.877685546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94097900390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9739990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99249267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9276123046875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.71, "temperature": 70, "power": 302.482}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91339111328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8748779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8927001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90081787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.958984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8587646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84796142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06488037109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99530029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1067.9432609126848, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.909423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91583251953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.897705078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95391845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.896728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9476318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9757080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87677001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.945068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0169677734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94317626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00152587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94317626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9803466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97845458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1069.808790922644, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9322509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.96636962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89862060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.971435546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9573974609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99713134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0126953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89569091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.06988525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [13, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89239501953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1099.097617328615, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8389892578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.819091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.849609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93353271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84130859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94427490234375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.72, "temperature": 71, "power": 315.143}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79571533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8326416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82928466796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97906494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87469482421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89910888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7947998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7410888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 832.0028485177525, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01068115234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86041259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.080078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.851318359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98089599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82415771484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0321044921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83056640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8673095703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.962158203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8050537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.894775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80596923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.934326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9671630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88690185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1059.6592821605248, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9774169921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01519775390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88018798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88262939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93914794921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.898193359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9637451171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95635986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.90606689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.985107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92730712890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9393310546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87530517578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99542236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9061279296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1065.3600819059798, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8958740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95416259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89166259765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86102294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89556884765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0406494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.75, "temperature": 72, "power": 196.835}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0345458984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93426513671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.036865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95068359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.993408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.99908447265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.879638671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [14, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80560302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1086.7169831184096, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8131103515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79180908203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.884033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83612060546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84588623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88916015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.921630859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00128173828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97076416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.94549560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77191162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8509521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 773.0099750475399, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.7977294921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04913330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9267578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.922119140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8463134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.11358642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8544921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8697509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8599853515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9713134765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.028076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89837646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9468994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01458740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02850341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1063.6583343133557, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91253662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83660888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79998779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88018798828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8387451171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0703125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.72, "temperature": 71, "power": 191.005}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.04052734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02410888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9302978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98956298828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9554443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97637939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93865966796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87896728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.86175537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9219970703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1059.457113978896, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95196533203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97454833984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.097412109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.03302001953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93475341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9525146484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87872314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8885498046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79437255859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93511962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.98779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91729736328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9586181640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87786865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.827880859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02691650390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1082.6974761183408, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05718994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "progress": [15, 50]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.78076171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.77789306640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.74603271484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89959716796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87744140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.72613525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.79461669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 710.3025088734263, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89373779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91949462890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.822998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.75787353515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.83685302734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89617919921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.80035400390625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.72, "temperature": 71, "power": 182.107}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9439697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93365478515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85809326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8887939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01885986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85614013671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.02154541015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.95733642578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.848876953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.85498046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1054.1756239459141, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.84747314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9488525390625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.91619873046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8489990234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.00286865234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.89837646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.82684326171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.88427734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.01019287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.92095947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.93646240234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8726806640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.87744140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.0084228515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.8590087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 64]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.97686767578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 1057.0730304183433, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [4746.375, 81920.0], "load": 0.74, "temperature": 72, "power": 276.682}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/torchvision/voirconf-resnet50.D0-ce1dc503950f5ff93fdadb45d1d68afe.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/torchvision/main.py", "--batch-size", "64", "--epochs", "50", "--lr", "0.01", "--model", "resnet50", "--no-stdout", "--precision", "tf32-fp16"], "time": 1711996602.500725, "return_code": 0}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/rwkv.D0.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/rwkv.D0.data new file mode 100644 index 000000000..eeabfca4f --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/rwkv.D0.data @@ -0,0 +1,479 @@ +{"event": "config", "data": {"argv": {"--accelerator": "gpu", "--adam_eps": "1e-8", "--beta1": 0.9, "--beta2": 0.99, "--ctx_len": 128, "--data_type": "dummy", "--devices": 1, "--enable_progress_bar": "False", "--epoch_begin": 0, "--epoch_count": 20, "--epoch_save": 0, "--epoch_steps": 1000, "--grad_cp": 0, "--head_qk": 0, "--lr_final": "1e-5", "--lr_init": "6e-4", "--micro_bsz": 16, "--n_embd": 768, "--n_layer": 12, "--pre_ffn": 0, "--precision": "tf32", "--random_seed": 1234, "--strategy": "ddp_find_unused_parameters_false", "--warmup_steps": 0}, "capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "group": "rwkv", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 600, "name": "rwkv", "plan": {"method": "per_gpu"}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["rwkv", "D0"], "tags": ["llm", "rnn", "unsupported-rocm"], "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 60}}, "weight": 1.0, "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 68, "power": 86.594, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711996605.163824, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv/voirconf-rwkv.D0-ce1dc503950f5ff93fdadb45d1d68afe.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py", "--accelerator", "gpu", "--adam_eps", "1e-8", "--beta1", "0.9", "--beta2", "0.99", "--ctx_len", "128", "--data_type", "dummy", "--devices", "1", "--enable_progress_bar", "False", "--epoch_begin", "0", "--epoch_count", "20", "--epoch_save", "0", "--epoch_steps", "1000", "--grad_cp", "0", "--head_qk", "0", "--lr_final", "1e-5", "--lr_init", "6e-4", "--micro_bsz", "16", "--n_embd", "768", "--n_layer", "12", "--pre_ffn", "0", "--precision", "tf32", "--random_seed", "1234", "--strategy", "ddp_find_unused_parameters_false", "--warmup_steps", "0"], "time": 1711996605.1794744}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "[2024-04-01 18:36:47,183] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n", "pipe": "stdout"} +{"event": "line", "data": "########## work in progress ##########\n", "pipe": "stderr"} +{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"} +{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"} +{"event": "line", "data": "########## WARNING: GLOBAL SEED 1234 THIS WILL AFFECT MULTIGPU SAMPLING ##########\n", "pipe": "stdout"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "line", "data": "Global seed set to 1234\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "############################################################################\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# RWKV-4 TF32 on 1x1 GPU, bsz 1x1x16=16, ddp_find_unused_parameters_false \n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Data = (dummy), ProjDir = /Users/satyaortiz-gagne/travail/mila/milabench/proj/rwkv/\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Epoch = 0 to 19 (will continue afterwards), save every 0 epoch\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Each \"epoch\" = 1000 steps, 16000 samples, 2048000 tokens\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Model = 12 n_layer, 768 n_embd, 128 ctx_len\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Adam = lr 0.0006 to 1e-05, warmup 0 steps, beta (0.9, 0.99), eps 1e-08\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "# Found torch 2.1.0+cu118, recommend 1.13.1+cu117 or newer\n", "pipe": "stderr"} +{"event": "line", "data": "# Found deepspeed 0.12.2, recommend 0.7.0 (faster than newer versions)\n", "pipe": "stderr"} +{"event": "line", "data": "# Found pytorch_lightning 1.9.5, recommend 1.9.1 or newer\n", "pipe": "stderr"} +{"event": "line", "data": "#\n", "pipe": "stderr"} +{"event": "line", "data": "############################################################################\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "{'load_model': '', 'wandb': '', 'proj_dir': '/Users/satyaortiz-gagne/travail/mila/milabench/proj/rwkv/', 'random_seed': 1234, 'data_file': '', 'data_type': 'dummy', 'vocab_size': 0, 'ctx_len': 128, 'epoch_steps': 1000, 'epoch_count': 20, 'epoch_begin': 0, 'epoch_save': 0, 'micro_bsz': 16, 'n_layer': 12, 'n_embd': 768, 'dim_att': 768, 'dim_ffn': 3072, 'pre_ffn': 0, 'head_qk': 0, 'tiny_att_dim': 0, 'tiny_att_layer': -999, 'lr_init': 0.0006, 'lr_final': 1e-05, 'warmup_steps': 0, 'beta1': 0.9, 'beta2': 0.99, 'adam_eps': 1e-08, 'grad_cp': 0, 'my_pile_version': 1, 'my_pile_stage': 0, 'my_pile_shift': -1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'my_img_version': 0, 'my_img_size': 0, 'my_img_bit': 0, 'my_img_clip': 'x', 'my_img_clip_scale': 1, 'my_img_l1_scale': 0, 'my_img_encoder': 'x', 'my_sample_len': 0, 'my_ffn_shift': 1, 'my_att_shift': 1, 'my_pos_emb': 0, 'load_partial': 0, 'magic_prime': 0, 'my_qa_mask': 0, 'my_testing': '', 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': False, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': None, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'ddp_find_unused_parameters_false', 'sync_batchnorm': False, 'precision': 'tf32', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2024-04-01-18-36-48', 'betas': (0.9, 0.99), 'real_bsz': 16, 'run_name': '0 ctx128 L12 D768'}\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "Building dummy data...\n", "pipe": "stderr"} +{"event": "line", "data": "Building token list...\n", "pipe": "stderr"} +{"event": "line", "data": "Data has 1620950 tokens, 13 vocab size.\n", "pipe": "stderr"} +{"event": "line", "data": "RWKV_MY_TESTING \n", "pipe": "stdout"} +{"event": "line", "data": "Using /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118 as PyTorch extensions root...\n", "pipe": "stderr"} +{"event": "line", "data": "Detected CUDA files, patching ldflags\n", "pipe": "stderr"} +{"event": "line", "data": "Emitting ninja build file /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118/wkv_128/build.ninja...\n", "pipe": "stderr"} +{"event": "line", "data": "Building extension module wkv_128...\n", "pipe": "stderr"} +{"event": "line", "data": "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", "pipe": "stderr"} +{"event": "line", "data": "[1/3] /usr/bin/nvcc -DTORCH_EXTENSION_NAME=wkv_128 -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/TH -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/THC -isystem /home/ubuntu/miniconda3/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -res-usage --maxrregcount 60 --use_fast_math -O3 -Xptxas -O3 --extra-device-vectorization -DTmax=128 -std=c++17 -c /mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/cuda/wkv_cuda.cu -o wkv_cuda.cuda.o \n", "pipe": "stdout"} +{"event": "line", "data": "ptxas info : 0 bytes gmem\n", "pipe": "stdout"} +{"event": "line", "data": "ptxas info : Compiling entry function '_Z15kernel_backwardIfEviiiPKT_S2_S2_S2_S2_S2_PS0_S3_S3_S3_' for 'sm_80'\n", "pipe": "stdout"} +{"event": "line", "data": "ptxas info : Function properties for _Z15kernel_backwardIfEviiiPKT_S2_S2_S2_S2_S2_PS0_S3_S3_S3_\n", "pipe": "stdout"} +{"event": "line", "data": " 1024 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads\n", "pipe": "stdout"} +{"event": "line", "data": "ptxas info : Used 48 registers, 448 bytes cmem[0], 16 bytes cmem[2]\n", "pipe": "stdout"} +{"event": "line", "data": "ptxas info : Compiling entry function '_Z14kernel_forwardIfEviiiPKT_S2_S2_S2_PS0_' for 'sm_80'\n", "pipe": "stdout"} +{"event": "line", "data": "ptxas info : Function properties for _Z14kernel_forwardIfEviiiPKT_S2_S2_S2_PS0_\n", "pipe": "stdout"} +{"event": "line", "data": " 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads\n", "pipe": "stdout"} +{"event": "line", "data": "ptxas info : Used 40 registers, 408 bytes cmem[0]\n", "pipe": "stdout"} +{"event": "line", "data": "[2/3] c++ -MMD -MF wkv_op.o.d -DTORCH_EXTENSION_NAME=wkv_128 -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/TH -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/THC -isystem /home/ubuntu/miniconda3/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++17 -c /mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/cuda/wkv_op.cpp -o wkv_op.o \n", "pipe": "stdout"} +{"event": "line", "data": "[3/3] c++ wkv_op.o wkv_cuda.cuda.o -shared -L/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda -ltorch -ltorch_python -L/usr/lib64 -lcudart -o wkv_128.so\n", "pipe": "stdout"} +{"event": "line", "data": "Loading extension module wkv_128...\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "line", "data": "############################################################################\n", "pipe": "stdout"} +{"event": "line", "data": "#\n", "pipe": "stdout"} +{"event": "line", "data": "# Init model weight (slow for large models)...\n", "pipe": "stdout"} +{"event": "line", "data": "#\n", "pipe": "stdout"} +{"event": "line", "data": "############################################################################\n", "pipe": "stdout"} +{"event": "line", "data": "\n", "pipe": "stdout"} +{"event": "line", "data": "13 768 -0.0006 emb.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.0.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.0.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.0.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.0.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.0.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.0.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.0.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.1.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.1.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.1.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.1.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.1.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.1.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.1.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.2.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.2.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.2.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.2.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.2.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.2.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.2.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.3.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.3.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.3.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.3.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.3.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.3.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.3.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.4.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.4.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.4.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.4.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.4.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.4.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.4.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.5.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.5.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.5.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.5.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.5.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.5.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.5.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.6.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.6.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.6.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.6.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.6.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.6.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.6.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.7.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.7.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.7.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.7.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.7.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.7.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.7.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.8.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.8.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.8.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.8.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.8.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.8.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.8.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.9.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.9.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.9.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.9.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.9.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.9.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.9.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.10.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.10.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.10.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.10.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.10.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.10.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.10.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.11.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 1.0 blocks.11.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.11.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.11.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 1.0 blocks.11.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 0 blocks.11.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 0 blocks.11.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "13 768 0.5 head.weight\n", "pipe": "stdout"} +{"event": "line", "data": "GPU available: True (cuda), used: True\n", "pipe": "stderr"} +{"event": "line", "data": "TPU available: False, using: 0 TPU cores\n", "pipe": "stderr"} +{"event": "line", "data": "IPU available: False, using: 0 IPUs\n", "pipe": "stderr"} +{"event": "line", "data": "HPU available: False, using: 0 HPUs\n", "pipe": "stderr"} +{"event": "line", "data": "13 768 emb.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln0.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ln0.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.0.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.0.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.0.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.0.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.1.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.1.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.1.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.1.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.2.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.2.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.2.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.2.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.3.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.3.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.3.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.3.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.4.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.4.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.4.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.4.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.5.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.5.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.5.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.5.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.6.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.6.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.6.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.6.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.7.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.7.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.7.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.7.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.8.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.8.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.8.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.8.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.9.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.9.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.9.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.9.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.10.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.10.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.10.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.10.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ln1.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ln1.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ln2.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ln2.bias\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_decay\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_first\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_mix_v\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.att.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.att.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.att.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.att.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.att.output.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ffn.time_mix_k\n", "pipe": "stdout"} +{"event": "line", "data": "768 blocks.11.ffn.time_mix_r\n", "pipe": "stdout"} +{"event": "line", "data": "3072 768 blocks.11.ffn.key.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 768 blocks.11.ffn.receptance.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 3072 blocks.11.ffn.value.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 ln_out.weight\n", "pipe": "stdout"} +{"event": "line", "data": "768 ln_out.bias\n", "pipe": "stdout"} +{"event": "line", "data": "13 768 head.weight\n", "pipe": "stdout"} +{"event": "line", "data": "[rank: 0] Global seed set to 1234\n", "pipe": "stderr"} +{"event": "line", "data": "Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/1\n", "pipe": "stderr"} +{"event": "line", "data": "----------------------------------------------------------------------------------------------------\n", "pipe": "stderr"} +{"event": "line", "data": "distributed_backend=nccl\n", "pipe": "stderr"} +{"event": "line", "data": "All distributed processes registered. Starting with 1 processes\n", "pipe": "stderr"} +{"event": "line", "data": "----------------------------------------------------------------------------------------------------\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]\n", "pipe": "stderr"} +{"event": "line", "data": "Installed CUDA version 11.5 does not match the version torch was compiled with 11.8 but since the APIs are compatible, accepting this combination\n", "pipe": "stdout"} +{"event": "line", "data": "Using /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118 as PyTorch extensions root...\n", "pipe": "stderr"} +{"event": "line", "data": "Detected CUDA files, patching ldflags\n", "pipe": "stderr"} +{"event": "line", "data": "Emitting ninja build file /mnt/Users/satyaortiz-gagne/travail/mila/milabench/cache/torch_extensions/py310_cu118/fused_adam/build.ninja...\n", "pipe": "stderr"} +{"event": "line", "data": "Building extension module fused_adam...\n", "pipe": "stderr"} +{"event": "line", "data": "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", "pipe": "stderr"} +{"event": "line", "data": "[1/3] /usr/bin/nvcc -DTORCH_EXTENSION_NAME=fused_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/includes -I/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/TH -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/THC -isystem /home/ubuntu/miniconda3/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -lineinfo --use_fast_math -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -DBF16_AVAILABLE -U__CUDA_NO_BFLOAT16_OPERATORS__ -U__CUDA_NO_BFLOAT162_OPERATORS__ -std=c++17 -c /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam/multi_tensor_adam.cu -o multi_tensor_adam.cuda.o \n", "pipe": "stdout"} +{"event": "line", "data": "FAILED: multi_tensor_adam.cuda.o \n", "pipe": "stdout"} +{"event": "line", "data": "/usr/bin/nvcc -DTORCH_EXTENSION_NAME=fused_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/includes -I/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/TH -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/THC -isystem /home/ubuntu/miniconda3/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_80,code=sm_80 --compiler-options '-fPIC' -O3 -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -lineinfo --use_fast_math -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_80,code=compute_80 -DBF16_AVAILABLE -U__CUDA_NO_BFLOAT16_OPERATORS__ -U__CUDA_NO_BFLOAT162_OPERATORS__ -std=c++17 -c /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam/multi_tensor_adam.cu -o multi_tensor_adam.cuda.o \n", "pipe": "stdout"} +{"event": "line", "data": "/usr/include/c++/11/bits/std_function.h:435:145: error: parameter packs not expanded with \u2018...\u2019:\n", "pipe": "stdout"} +{"event": "line", "data": " 435 | function(_Functor&& __f)\n", "pipe": "stdout"} +{"event": "line", "data": " | ^ \n", "pipe": "stdout"} +{"event": "line", "data": "/usr/include/c++/11/bits/std_function.h:435:145: note: \u2018_ArgTypes\u2019\n", "pipe": "stdout"} +{"event": "line", "data": "/usr/include/c++/11/bits/std_function.h:530:146: error: parameter packs not expanded with \u2018...\u2019:\n", "pipe": "stdout"} +{"event": "line", "data": " 530 | operator=(_Functor&& __f)\n", "pipe": "stdout"} +{"event": "line", "data": " | ^ \n", "pipe": "stdout"} +{"event": "line", "data": "/usr/include/c++/11/bits/std_function.h:530:146: note: \u2018_ArgTypes\u2019\n", "pipe": "stdout"} +{"event": "line", "data": "[2/3] c++ -MMD -MF fused_adam_frontend.o.d -DTORCH_EXTENSION_NAME=fused_adam -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\\\"_gcc\\\" -DPYBIND11_STDLIB=\\\"_libstdcpp\\\" -DPYBIND11_BUILD_ABI=\\\"_cxxabi1011\\\" -I/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/includes -I/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/torch/csrc/api/include -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/TH -isystem /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/include/THC -isystem /home/ubuntu/miniconda3/include/python3.10 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++17 -O3 -std=c++17 -g -Wno-reorder -DVERSION_GE_1_1 -DVERSION_GE_1_3 -DVERSION_GE_1_5 -DBF16_AVAILABLE -c /Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/csrc/adam/fused_adam_frontend.cpp -o fused_adam_frontend.o \n", "pipe": "stdout"} +{"event": "line", "data": "ninja: build stopped: subcommand failed.\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1574.375, 81920.0], "load": 0, "temperature": 56, "power": 72.316}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 64, "power": 52.388}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 63, "power": 51.9}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 62, "power": 51.704}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 61, "power": 51.19}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [887.5625, 81920.0], "load": 0, "temperature": 60, "power": 50.714}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2680.375, 81920.0], "load": 0, "temperature": 60, "power": 75.948}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2680.375, 81920.0], "load": 0, "temperature": 59, "power": 75.252}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2680.375, 81920.0], "load": 0, "temperature": 58, "power": 74.215}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2680.375, 81920.0], "load": 0, "temperature": 58, "power": 74.265}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2680.375, 81920.0], "load": 0, "temperature": 57, "power": 73.277}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2680.375, 81920.0], "load": 0, "temperature": 57, "power": 72.921}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2680.375, 81920.0], "load": 0, "temperature": 56, "power": 71.407}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2680.375, 81920.0], "load": 0, "temperature": 56, "power": 71.848}}}, "pipe": "data"} +{"event": "error", "data": {"type": "RuntimeError", "message": "Error building extension 'fused_adam'"}, "pipe": "data"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 2100, in _run_ninja_build\n", "pipe": "stderr"} +{"event": "line", "data": " subprocess.run(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/home/ubuntu/miniconda3/lib/python3.10/subprocess.py\", line 526, in run\n", "pipe": "stderr"} +{"event": "line", "data": " raise CalledProcessError(retcode, process.args,\n", "pipe": "stderr"} +{"event": "line", "data": "subprocess.CalledProcessError: Command '['ninja', '-v']' returned non-zero exit status 1.\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "The above exception was the direct cause of the following exception:\n", "pipe": "stderr"} +{"event": "line", "data": "\n", "pipe": "stderr"} +{"event": "line", "data": "Traceback (most recent call last):\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/bin/voir\", line 8, in \n", "pipe": "stderr"} +{"event": "line", "data": " sys.exit(main())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/cli.py\", line 124, in main\n", "pipe": "stderr"} +{"event": "line", "data": " ov(sys.argv[1:] if argv is None else argv)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/phase.py\", line 334, in __call__\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/overseer.py\", line 242, in _run\n", "pipe": "stderr"} +{"event": "line", "data": " set_value(func())\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/voir/scriptutils.py\", line 37, in \n", "pipe": "stderr"} +{"event": "line", "data": " return lambda: exec(mainsection, glb, glb)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py\", line 420, in \n", "pipe": "stderr"} +{"event": "line", "data": " trainer.fit(model, data_loader)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 608, in fit\n", "pipe": "stderr"} +{"event": "line", "data": " call._call_and_handle_interrupt(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py\", line 36, in _call_and_handle_interrupt\n", "pipe": "stderr"} +{"event": "line", "data": " return trainer.strategy.launcher.launch(trainer_fn, *args, trainer=trainer, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/launchers/subprocess_script.py\", line 88, in launch\n", "pipe": "stderr"} +{"event": "line", "data": " return function(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 650, in _fit_impl\n", "pipe": "stderr"} +{"event": "line", "data": " self._run(model, ckpt_path=self.ckpt_path)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 1093, in _run\n", "pipe": "stderr"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "line", "data": " self.strategy.setup(self)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/ddp.py\", line 181, in setup\n", "pipe": "stderr"} +{"event": "line", "data": " self.setup_optimizers(trainer)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/strategies/strategy.py\", line 142, in setup_optimizers\n", "pipe": "stderr"} +{"event": "line", "data": " self.optimizers, self.lr_scheduler_configs, self.optimizer_frequencies = _init_optimizers_and_lr_schedulers(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/core/optimizer.py\", line 180, in _init_optimizers_and_lr_schedulers\n", "pipe": "stderr"} +{"event": "line", "data": " optim_conf = model.trainer._call_lightning_module_hook(\"configure_optimizers\", pl_module=model)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py\", line 1356, in _call_lightning_module_hook\n", "pipe": "stderr"} +{"event": "line", "data": " output = fn(*args, **kwargs)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/mnt/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/src/model.py\", line 606, in configure_optimizers\n", "pipe": "stderr"} +{"event": "line", "data": " return FusedAdam(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/adam/fused_adam.py\", line 94, in __init__\n", "pipe": "stderr"} +{"event": "line", "data": " fused_adam_cuda = FusedAdamBuilder().load()\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py\", line 452, in load\n", "pipe": "stderr"} +{"event": "line", "data": " return self.jit_load(verbose)\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py\", line 501, in jit_load\n", "pipe": "stderr"} +{"event": "line", "data": " op_module = load(name=self.name,\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 1308, in load\n", "pipe": "stderr"} +{"event": "line", "data": " return _jit_compile(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 1710, in _jit_compile\n", "pipe": "stderr"} +{"event": "line", "data": " _write_ninja_file_and_build_library(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 1823, in _write_ninja_file_and_build_library\n", "pipe": "stderr"} +{"event": "line", "data": " _run_ninja_build(\n", "pipe": "stderr"} +{"event": "line", "data": " File \"/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/utils/cpp_extension.py\", line 2116, in _run_ninja_build\n", "pipe": "stderr"} +{"event": "line", "data": " raise RuntimeError(message) from e\n", "pipe": "stderr"} +{"event": "line", "data": "RuntimeError: Error building extension 'fused_adam'\n", "pipe": "stderr"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/rwkv/voirconf-rwkv.D0-ce1dc503950f5ff93fdadb45d1d68afe.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/rwkv/rwkv-v4neo/train.py", "--accelerator", "gpu", "--adam_eps", "1e-8", "--beta1", "0.9", "--beta2", "0.99", "--ctx_len", "128", "--data_type", "dummy", "--devices", "1", "--enable_progress_bar", "False", "--epoch_begin", "0", "--epoch_count", "20", "--epoch_save", "0", "--epoch_steps", "1000", "--grad_cp", "0", "--head_qk", "0", "--lr_final", "1e-5", "--lr_init", "6e-4", "--micro_bsz", "16", "--n_embd", "768", "--n_layer", "12", "--pre_ffn", "0", "--precision", "tf32", "--random_seed", "1234", "--strategy", "ddp_find_unused_parameters_false", "--warmup_steps", "0"], "time": 1711996647.6078098, "return_code": 1}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/stargan.D0.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/stargan.D0.data new file mode 100644 index 000000000..0a2ed9a73 --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/stargan.D0.data @@ -0,0 +1,680 @@ +{"event": "config", "data": {"argv": {"--batch_size": 16, "--c_dim": 5, "--image_size": 512}, "capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "group": "stargan", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 600, "name": "stargan", "plan": {"method": "per_gpu"}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["stargan", "D0"], "tags": ["gan", "resnet", "vision"], "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 60}}, "weight": 1.0, "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 55, "power": 71.945, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711996650.339408, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/voirconf-stargan.D0-ce1dc503950f5ff93fdadb45d1d68afe.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py", "--batch_size", "16", "--c_dim", "5", "--image_size", "512"], "time": 1711996650.3561845}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "Namespace(c_dim=5, c2_dim=8, celeba_crop_size=178, rafd_crop_size=256, image_size=512, g_conv_dim=64, d_conv_dim=64, g_repeat_num=6, d_repeat_num=6, lambda_cls=1, lambda_rec=10, lambda_gp=10, dataset='synth', batch_size=16, num_iters=200000, num_iters_decay=100000, g_lr=0.0001, d_lr=0.0001, n_critic=5, beta1=0.5, beta2=0.999, resume_iters=None, selected_attrs=['Black_Hair', 'Blond_Hair', 'Brown_Hair', 'Male', 'Young'], test_iters=200000, num_workers=1, mode='train', use_tensorboard=False, celeba_image_dir='data/celeba/images', attr_path='data/celeba/list_attr_celeba.txt', rafd_image_dir='data/RaFD/train', log_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/logs', model_save_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/models', sample_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/samples', result_dir='/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/results', log_step=10, sample_step=1000, model_save_step=10000, lr_update_step=1000)\n", "pipe": "stdout"} +{"event": "line", "data": "Generator(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(8, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (5): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (6): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (7): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (8): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (9): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (10): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (11): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (12): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (13): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (14): ResidualBlock(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (1): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): InstanceNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (15): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (16): InstanceNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (17): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (18): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (19): InstanceNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (20): ReLU(inplace=True)\n", "pipe": "stdout"} +{"event": "line", "data": " (21): Conv2d(64, 3, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (22): Tanh()\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": ")\n", "pipe": "stdout"} +{"event": "line", "data": "G\n", "pipe": "stdout"} +{"event": "line", "data": "The number of parameters: 8430528\n", "pipe": "stdout"} +{"event": "line", "data": "Discriminator(\n", "pipe": "stdout"} +{"event": "line", "data": " (main): Sequential(\n", "pipe": "stdout"} +{"event": "line", "data": " (0): Conv2d(3, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (1): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (2): Conv2d(64, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (3): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (4): Conv2d(128, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (5): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (6): Conv2d(256, 512, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (7): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (8): Conv2d(512, 1024, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (9): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " (10): Conv2d(1024, 2048, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))\n", "pipe": "stdout"} +{"event": "line", "data": " (11): LeakyReLU(negative_slope=0.01)\n", "pipe": "stdout"} +{"event": "line", "data": " )\n", "pipe": "stdout"} +{"event": "line", "data": " (conv1): Conv2d(2048, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": " (conv2): Conv2d(2048, 5, kernel_size=(8, 8), stride=(1, 1), bias=False)\n", "pipe": "stdout"} +{"event": "line", "data": ")\n", "pipe": "stdout"} +{"event": "line", "data": "D\n", "pipe": "stdout"} +{"event": "line", "data": "The number of parameters: 45376448\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [0, 10000]}, "pipe": "data"} +{"event": "line", "data": "Start training...\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "progress": [1, 10000]}, "pipe": "data"} +{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(warning.format(ret))\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 13.064104080200195}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [53428.375, 81920.0], "load": 1.0, "temperature": 55, "power": 74.891}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [19484.375, 81920.0], "load": 1.0, "temperature": 56, "power": 111.842}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 12.269195556640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.614904403686523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.643402099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 8.18210603853625, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.947258472442627}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 67.10989930952344, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [29784.375, 81920.0], "load": 0.99, "temperature": 61, "power": 383.814}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.145265579223633}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.017976760864258}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 6.545051929113896, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [31678.375, 81920.0], "load": 1.0, "temperature": 60, "power": 365.273}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.2392847537994385}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.4749865531921387}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.0874757766723633}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:12], Iteration [10/200000], D/loss_real: -1.5526, D/loss_fake: -0.2841, D/loss_cls: 3.5713, D/loss_gp: 0.0353, G/loss_fake: 1.0371, G/loss_rec: 0.5529, G/loss_cls: 3.6157\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 44.338289997909264, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.897923469543457}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.097832679748535}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.425319194793701}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.420851230621338}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.0333638191223145}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.506190507720035, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36418.375, 81920.0], "load": 1.0, "temperature": 62, "power": 293.338}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.899639129638672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.81927752494812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.40332914852883, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.5159292221069336}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.150247573852539}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.6031622886657715}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:16], Iteration [20/200000], D/loss_real: -1.3235, D/loss_fake: -0.8273, D/loss_cls: 3.2532, D/loss_gp: 0.1501, G/loss_fake: -0.0646, G/loss_rec: 0.5317, G/loss_cls: 3.3520\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 41.38395073435859, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.9734550714492798}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.229659080505371}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.780806064605713}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8421356678009033}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.528245449066162}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.157669926571955, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36418.375, 81920.0], "load": 0.91, "temperature": 62, "power": 236.23}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.230276107788086}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.7836761474609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.163940236629294, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.584670066833496}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.404325485229492}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.796459436416626}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:19], Iteration [30/200000], D/loss_real: -2.9522, D/loss_fake: 0.8799, D/loss_cls: 3.2994, D/loss_gp: 0.0569, G/loss_fake: -0.7619, G/loss_rec: 0.5167, G/loss_cls: 3.3448\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 41.96714005982439, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.382101058959961}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.2163033485412598}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.5934066772460938}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36418.375, 81920.0], "load": 0.92, "temperature": 64, "power": 142.233}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.2557684183120728}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.8033437132835388}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.51865387493717, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.0602636337280273}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6703366041183472}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.718268363931195, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3135545253753662}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": -0.09294269979000092}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6973950862884521}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:23], Iteration [40/200000], D/loss_real: -7.3682, D/loss_fake: 2.4684, D/loss_cls: 3.3087, D/loss_gp: 0.2289, G/loss_fake: -1.1050, G/loss_rec: 0.5351, G/loss_cls: 3.4843\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 42.82031337323749, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.2767248153686523}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 64, "power": 267.703}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.33988261222839355}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.118654251098633}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.2893550395965576}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.454758644104004}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.05076958748643, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.8980081081390381}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.4738134145736694}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 26.980799392351923, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.6294066905975342}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9489808678627014}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7503081560134888}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:26], Iteration [50/200000], D/loss_real: -6.5075, D/loss_fake: 3.0037, D/loss_cls: 3.6211, D/loss_gp: 0.0633, G/loss_fake: -2.8019, G/loss_rec: 0.5180, G/loss_cls: 3.3607\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.92, "temperature": 65, "power": 374.572}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.402381287497796, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.7301626205444336}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.5904934406280518}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9626620411872864}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.0809922218322754}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.837551474571228}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.35453489120627, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.2931716442108154}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.0357056856155396}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 26.807538269708616, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.8651372790336609}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.8135475516319275}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.92, "temperature": 65, "power": 274.901}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.1261632442474365}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:30], Iteration [60/200000], D/loss_real: -6.2381, D/loss_fake: 3.4334, D/loss_cls: 3.8030, D/loss_gp: 0.0128, G/loss_fake: -3.1178, G/loss_rec: 0.5245, G/loss_cls: 3.4663\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 44.744017156750346, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.4488235712051392}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.138313889503479}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9662548303604126}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9148251414299011}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7599883675575256}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.45172101745684, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.557898759841919}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.4697370529174805}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.167171266309868, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.92, "temperature": 66, "power": 346.163}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.3419642448425293}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.352891445159912}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.0670921802520752}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:33], Iteration [70/200000], D/loss_real: -6.1336, D/loss_fake: 3.6070, D/loss_cls: 3.2317, D/loss_gp: 0.0362, G/loss_fake: -2.9490, G/loss_rec: 0.5303, G/loss_cls: 3.3461\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 43.75262948745654, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.9371880292892456}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.9380738735198975}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.7881181240081787}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.6692737340927124}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.407157301902771}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.30233034530413, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 66, "power": 304.719}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.4188573360443115}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.5791356563568115}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 26.91992832037416, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.2513959407806396}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.0550239086151123}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.829297661781311}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:37], Iteration [80/200000], D/loss_real: -5.6220, D/loss_fake: 4.2100, D/loss_cls: 3.1568, D/loss_gp: 0.0084, G/loss_fake: -4.1097, G/loss_rec: 0.5768, G/loss_cls: 3.3256\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 44.545616613881656, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.0398800373077393}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.293461799621582}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.3749208450317383}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.4707329273223877}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.773996353149414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.378925935016895, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.95, "temperature": 66, "power": 383.173}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.5215606689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.4862334728240967}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.13936393892551, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.2857654094696045}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.7168182134628296}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.352587938308716}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:40], Iteration [90/200000], D/loss_real: -3.6000, D/loss_fake: 2.1493, D/loss_cls: 3.4008, D/loss_gp: 0.0403, G/loss_fake: -3.7955, G/loss_rec: 0.5678, G/loss_cls: 3.3396\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 41.603535756919804, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.078941822052002}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.149059295654297}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.273155212402344}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.91, "temperature": 67, "power": 148.518}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.97417688369751}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.6015634536743164}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.05341759865504, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.147404670715332}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.7615745067596436}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.907126854925274, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.401129961013794}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.0921080112457275}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.917656660079956}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:44], Iteration [100/200000], D/loss_real: -3.4763, D/loss_fake: 3.1221, D/loss_cls: 3.2684, D/loss_gp: 0.0003, G/loss_fake: -3.2210, G/loss_rec: 0.5718, G/loss_cls: 3.3083\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 40.33706820606543, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.4977495670318604}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 67, "power": 290.037}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.2604527473449707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.780794382095337}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.434880256652832}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.104581832885742}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.06838310835604, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.3963165283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.610321044921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.974743282774583, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.5654871463775635}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.465573310852051}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.3404765129089355}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:47], Iteration [110/200000], D/loss_real: -1.5610, D/loss_fake: 0.8353, D/loss_cls: 3.3390, D/loss_gp: 0.2727, G/loss_fake: -0.5261, G/loss_rec: 0.5613, G/loss_cls: 3.4109\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 67, "power": 307.63}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.129709919884675, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.627653121948242}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.189291477203369}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.1299052238464355}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.0223917961120605}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.651988983154297}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.33075735562764, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.6748223304748535}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.7516798973083496}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.893904878801933, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.1911935806274414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.157087802886963}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.93, "temperature": 67, "power": 152.494}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.207822322845459}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:51], Iteration [120/200000], D/loss_real: -1.9528, D/loss_fake: 0.5073, D/loss_cls: 3.6261, D/loss_gp: 0.0027, G/loss_fake: -1.1106, G/loss_rec: 0.5299, G/loss_cls: 4.0362\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 40.75473098768726, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.2220218181610107}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.688556432723999}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8925600051879883}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.79996657371521}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.017216682434082}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.34877253457034, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.578012704849243}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.92, "temperature": 68, "power": 294.272}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.9697957038879395}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.01201073435308, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.6117188930511475}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.276488304138184}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.6780238151550293}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:54], Iteration [130/200000], D/loss_real: -0.6747, D/loss_fake: -0.3483, D/loss_cls: 3.4450, D/loss_gp: 0.0256, G/loss_fake: 1.0789, G/loss_rec: 0.5269, G/loss_cls: 3.3223\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 41.8267805994244, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9097275733947754}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.2461371421813965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.753172874450684}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8317785263061523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.1198906898498535}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.18354420922659, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 69, "power": 210.394}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.993666648864746}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.351744651794434}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.819021430242213, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.553812026977539}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.063791275024414}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.7950423955917358}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:00:58], Iteration [140/200000], D/loss_real: -2.5320, D/loss_fake: 0.4503, D/loss_cls: 3.7468, D/loss_gp: 0.0130, G/loss_fake: 0.0287, G/loss_rec: 0.5161, G/loss_cls: 3.4336\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 40.57661510480372, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.630168914794922}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.5270442962646484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.9994158744812012}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [144, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.376949429512024}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [145, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.122372627258301}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.91, "temperature": 68, "power": 264.68}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.005648073816324, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [146, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.09115219116211}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [147, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 17.153715133666992}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.632564311450587, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [148, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.206098556518555}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [149, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.303515434265137}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [150, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.863801121711731}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:01], Iteration [150/200000], D/loss_real: -2.8905, D/loss_fake: 0.6730, D/loss_cls: 3.7476, D/loss_gp: 0.0334, G/loss_fake: -1.5627, G/loss_rec: 0.5124, G/loss_cls: 3.3072\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 41.02384207757126, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [151, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.152059555053711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [152, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.371776580810547}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.97, "temperature": 68, "power": 341.318}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [153, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.388545989990234}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [154, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.298101425170898}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [155, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.407421112060547}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.93939202831225, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [156, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.2357001304626465}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [157, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.768301248550415}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.947682458176548, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [158, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.3944292068481445}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [159, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.134510040283203}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [160, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.7817540168762207}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:05], Iteration [160/200000], D/loss_real: -1.8827, D/loss_fake: 1.3493, D/loss_cls: 3.3049, D/loss_gp: 0.0010, G/loss_fake: -1.3975, G/loss_rec: 0.5182, G/loss_cls: 3.2961\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 40.41148785346479, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [161, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.565572738647461}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 69, "power": 307.818}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [162, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.416189193725586}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [163, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.242586851119995}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [164, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.038175582885742}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [165, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8489742279052734}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.933557778776006, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [166, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.3193347454071045}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [167, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.976303815841675}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.843172642523314, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [168, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.6734554767608643}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [169, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.3444387912750244}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [170, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.0382609367370605}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:08], Iteration [170/200000], D/loss_real: -3.3153, D/loss_fake: 1.9491, D/loss_cls: 3.2936, D/loss_gp: 0.0111, G/loss_fake: -1.6811, G/loss_rec: 0.5329, G/loss_cls: 3.3354\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.92, "temperature": 70, "power": 352.795}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 40.049517022599154, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [171, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.475222587585449}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [172, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.068521738052368}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [173, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.6299798488616943}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [174, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.3304412364959717}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [175, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.0161538124084473}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.25408653862376, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [176, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.6660009622573853}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [177, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.4667354822158813}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.92445171116659, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [178, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.1055257320404053}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [179, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.1622955799102783}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.96, "temperature": 68, "power": 165.263}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [180, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.1238198280334473}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:12], Iteration [180/200000], D/loss_real: -2.8559, D/loss_fake: 0.5941, D/loss_cls: 3.2928, D/loss_gp: 0.0093, G/loss_fake: -0.7820, G/loss_rec: 0.5166, G/loss_cls: 3.3884\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 39.83198057357795, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [181, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.7098946571350098}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [182, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.51466965675354}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [183, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.008143186569214}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [184, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.067349910736084}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [185, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.8076348304748535}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.852140509285086, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [186, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.1402106285095215}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 70, "power": 265.852}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [187, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.997976064682007}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 28.840486283296645, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [188, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.871967315673828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [189, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.7631046772003174}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [190, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.617359161376953}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:16], Iteration [190/200000], D/loss_real: -1.6931, D/loss_fake: 1.0205, D/loss_cls: 3.2839, D/loss_gp: 0.0006, G/loss_fake: -0.7204, G/loss_rec: 0.5091, G/loss_cls: 3.3371\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 40.31118768896798, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [191, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.572483777999878}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [192, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.3426673412323}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [193, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.014084577560425}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [194, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.6669960021972656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [195, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.339291572570801}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.86012408635559, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 0.99, "temperature": 69, "power": 290.744}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [196, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 11.162610054016113}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [197, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.6627864837646484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 27.926404770550437, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [198, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.650336265563965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [199, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.6977224349975586}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [200, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.569446325302124}, "pipe": "data"} +{"event": "line", "data": "Elapsed [0:01:19], Iteration [200/200000], D/loss_real: -4.7938, D/loss_fake: 3.8599, D/loss_cls: 3.4662, D/loss_gp: 0.0037, G/loss_fake: -3.9476, G/loss_rec: 0.5115, G/loss_cls: 3.3025\n", "pipe": "stdout"} +{"event": "data", "data": {"task": "train", "rate": 41.86212211399227, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [37442.375, 81920.0], "load": 1.0, "temperature": 70, "power": 313.068}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/stargan/voirconf-stargan.D0-ce1dc503950f5ff93fdadb45d1d68afe.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/stargan/stargan/main.py", "--batch_size", "16", "--c_dim", "5", "--image_size", "512"], "time": 1711996733.8059762, "return_code": 0}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/super-slomo.D0.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/super-slomo.D0.data new file mode 100644 index 000000000..8f4b1922c --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/super-slomo.D0.data @@ -0,0 +1,398 @@ +{"event": "config", "data": {"argv": {"--train_batch_size": 32}, "capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "group": "super-slomo", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 600, "name": "super-slomo", "plan": {"method": "per_gpu"}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["super-slomo", "D0"], "tags": ["convnet", "unet", "video-interpolation", "vision"], "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 60}}, "weight": 1.0, "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 66, "power": 84.705, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711996736.488015, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo/voirconf-super-slomo.D0-ce1dc503950f5ff93fdadb45d1d68afe.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py", "--train_batch_size", "32"], "time": 1711996736.5049727}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torchvision/models/_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=VGG16_Weights.IMAGENET1K_V1`. You can also use `weights=VGG16_Weights.DEFAULT` to get the most up-to-date weights.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(msg)\n", "pipe": "stderr"} +{"event": "line", "data": "Epoch: 0\n", "pipe": "stdout"} +{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:136: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`. Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\"Detected call of `lr_scheduler.step()` before `optimizer.step()`. \"\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "progress": [0, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1472.375, 81920.0], "load": 0, "temperature": 62, "power": 77.198}}}, "pipe": "data"} +{"event": "line", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch/lib/python3.10/site-packages/torch/nn/functional.py:4296: UserWarning: Default grid_sample and affine_grid behavior has changed to align_corners=False since 1.3.0. Please specify align_corners=True if the old behavior is desired. See the documentation of grid_sample for details.\n", "pipe": "stderr"} +{"event": "line", "data": " warnings.warn(\n", "pipe": "stderr"} +{"event": "data", "data": {"task": "train", "loss": 328.44195556640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.4143371582031}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.83, "temperature": 66, "power": 272.181}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3927307128906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3746337890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3583679199219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3468017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.34136962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 66, "power": 236.026}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3391418457031}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3383483886719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.33819580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.47766369952002, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.33819580078125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.9, "temperature": 67, "power": 274.923}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.33807373046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.450683491599534, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3379211425781}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3377380371094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.628192788679655, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3375549316406}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3374328613281}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 67, "power": 271.278}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.107677793635595, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3371887207031}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.46825373567794, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.33697509765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.336669921875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.31162809992703, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3362731933594}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.95, "temperature": 68, "power": 284.68}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3358459472656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.66483892817741, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.33538818359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3349304199219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.52715194820562, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3346252441406}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.23815204929625, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3341064453125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 70, "power": 274.119}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3336181640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.5154218607697, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3331604003906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3326110839844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.07705603641837, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3323059082031}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.99, "temperature": 70, "power": 360.158}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.33197021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.197975348321464, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3313903808594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.90143063663614, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3306579589844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3304443359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.55429560184462, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 71, "power": 349.644}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.32965087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3292236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.1277196669992, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3288269042969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3283386230469}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.93195892340062, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3277587890625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 70, "power": 315.696}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.32708740234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.84658812913994, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3265380859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.55214107030514, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3255615234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3249816894531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.177441398462584, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 71, "power": 271.484}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.32452392578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.32342529296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 37.97991572089352, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3218078613281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.31915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.12769243711983, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3172302246094}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 72, "power": 319.686}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3135681152344}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.549940361304564, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.32110595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 32.99337546944122, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.31353759765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3174133300781}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.021979343071905, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 71, "power": 245.135}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3187561035156}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3249816894531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.246556826421376, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.32012939453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 36.595414628169124, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.32049560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3199157714844}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.81, "temperature": 72, "power": 248.9}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 37.940137255880416, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3191833496094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3187561035156}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.98467643516222, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.31689453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3147888183594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.76648359837244, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.99, "temperature": 73, "power": 327.146}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.31243896484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.52186114101584, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.30938720703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3166198730469}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.268467488663184, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.308349609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.30780029296875}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.79, "temperature": 74, "power": 170.415}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.42814702313145, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.30792236328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.30743408203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.72209024493063, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.30645751953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 36.504094870227505, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3050537109375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.8, "temperature": 73, "power": 287.376}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3038635253906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.0399529786477, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.3028869628906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.30194091796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.21592378808098, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.29998779296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.79404329298469, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2977600097656}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.8, "temperature": 72, "power": 86.484}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2954406738281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.13834832209753, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.29339599609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2906188964844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.58746075411424, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2864074707031}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 74, "power": 309.464}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.28533935546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.23622249719044, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2856750488281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 36.11918789766923, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2820739746094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2826232910156}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.11989574822996, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2816467285156}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.91, "temperature": 75, "power": 344.041}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2808837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.17640596896027, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.287841796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 36.673359933048474, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.28338623046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2843322753906}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.92762360110946, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 74, "power": 265.33}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2837219238281}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2850646972656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.78858986802904, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.78759721973771, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2843017578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.28265380859375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.84, "temperature": 75, "power": 290.721}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.180618981725125, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2816467285156}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2811584472656}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.31265084943483, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.27947998046875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.27850341796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.15154051582907, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 75, "power": 120.316}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.27545166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 36.44622861185961, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.27410888671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.271240234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 37.796821078756075, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2685241699219}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2641296386719}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 75, "power": 267.84}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 41.80381980117287, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2713317871094}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.26416015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.703005979108774, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2639465332031}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.222555124475974, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.26129150390625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 75, "power": 315.653}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2605285644531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 38.08046138020445, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2527160644531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2520446777344}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.80542095200426, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2474060058594}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2507019042969}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 73, "power": 263.997}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.91743580123598, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2396240234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 39.143769288444, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2396240234375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2369689941406}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 37.99451535872717, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2301940917969}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 0.99, "temperature": 75, "power": 257.271}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 10000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 328.2263488769531}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.030321247287326, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [33816.375, 81920.0], "load": 1.0, "temperature": 76, "power": 278.611}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/super-slomo/voirconf-super-slomo.D0-ce1dc503950f5ff93fdadb45d1d68afe.json", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/super-slomo/slomo/train.py", "--train_batch_size", "32"], "time": 1711996821.6987398, "return_code": 0}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/t5.D0.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/t5.D0.data new file mode 100644 index 000000000..876e65f1f --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/t5.D0.data @@ -0,0 +1,594 @@ +{"event": "config", "data": {"argv": {"--batch-size": 16, "--model": "T5", "--num-workers": 8, "--precision": "tf32-fp16"}, "capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "group": "hf", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 600, "name": "t5", "plan": {"method": "per_gpu"}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["t5", "D0"], "tags": ["huggingface", "language-modeling", "nlp", "transformer"], "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 60}}, "weight": 2.0, "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 71, "power": 90.017, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711996824.400491, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-t5.D0-ce1dc503950f5ff93fdadb45d1d68afe.json", "-m", "bench", "--batch-size", "16", "--model", "T5", "--num-workers", "8", "--precision", "tf32-fp16"], "time": 1711996824.4157407}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.970378875732422}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.800580024719238}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.727293014526367}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.682456970214844}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 72, "power": 309.765}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.654926300048828}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.628446578979492}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.61573600769043}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.587346076965332}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.556131362915039}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.513788223266602}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.457639694213867}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.321690559387207}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.367353439331055}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 73, "power": 293.037}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.313187599182129}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.28883171081543}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.212865829467773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.028278350830078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.84654426574707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 10.015433311462402}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.654990196228027}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.75164081920551, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.576047897338867}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.431913375854492}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.35800552368164}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.99, "temperature": 73, "power": 287.995}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.429001679630915, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.306499481201172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 9.0736665725708}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.939047813415527}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.910099029541016}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.36286896924885, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.83672046661377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.653719902038574}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.481781959533691}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.79376359836275, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.48547649383545}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.421192169189453}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 74, "power": 302.57}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.146132469177246}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.49770140563876, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 8.017644882202148}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.908407211303711}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.934696197509766}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.747555732727051}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.45187034398659, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.655747413635254}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.503643035888672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.347489356994629}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.96678666863716, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.31602668762207}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 74, "power": 288.926}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.231689929962158}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 7.05312442779541}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.49962089612466, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.9287261962890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.817322731018066}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.710197448730469}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.60150671005249}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.228152441527875, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.518242359161377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.411893844604492}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.325069427490234}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.2818689338804, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 74, "power": 286.218}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.233058929443359}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.172609329223633}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.097485542297363}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.66938379274259, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 6.041872501373291}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.932075023651123}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.845073699951172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.775830268859863}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.16158131464933, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.705239295959473}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.633525848388672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.575502395629883}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 74, "power": 246.476}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.02021519065763, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.5167365074157715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.423802852630615}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.354887962341309}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.47855950351165, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.272907257080078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.22577428817749}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.148532390594482}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.052487373352051}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.85072561803738, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 5.000744819641113}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.897304534912109}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 75, "power": 311.703}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.836282253265381}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.89233724680381, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.77700138092041}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.716606140136719}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.660195827484131}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.557338292580866, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.5612311363220215}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.502846717834473}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.482702732086182}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.492550373077393}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.141071842824026, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.360192775726318}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 75, "power": 282.229}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.2910284996032715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.2590203285217285}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.95276891126303, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.185894012451172}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.096889972686768}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 4.042828559875488}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.21368399632839, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.9600980281829834}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.8612828254699707}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.813199996948242}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.774353265762329}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.859770837606376, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.680110216140747}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 75, "power": 294.544}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.641136646270752}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.5967588424682617}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.31511288824878, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.522066354751587}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.439159870147705}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.3564889430999756}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.90571322078739, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.3717949390411377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.323021650314331}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.2349772453308105}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 49.06525220703401, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.192401647567749}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 75, "power": 299.592}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.1241087913513184}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 3.0823092460632324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.988307476043701}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.62601945797189, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.9341442584991455}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.8254554271698}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.76162052154541}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.572384521663835, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.763589859008789}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.7407455444335938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.672053337097168}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 76, "power": 313.43}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.27825363137593, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.585357189178467}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.5391626358032227}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.4440035820007324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.3648884296417236}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.11307447716599, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.2605745792388916}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.2033910751342773}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.1607017517089844}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.215326025832375, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.115994691848755}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.066122055053711}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 76, "power": 286.99}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.993964672088623}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.65843603986932, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 2.0019028186798096}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.9345535039901733}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.8910869359970093}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 49.2793691511414, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.861147165298462}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.8138902187347412}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.7461824417114258}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.6692562103271484}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.45939462883387, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.625261664390564}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.5834534168243408}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 76, "power": 304.891}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.5272022485733032}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.748746152485325, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.459859013557434}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.4157166481018066}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.3812469244003296}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.07073154768558, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.3119561672210693}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.2564345598220825}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.2277307510375977}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.1866565942764282}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.689430555701435, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.1411606073379517}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 76, "power": 294.388}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.083279013633728}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.0341026782989502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.40478062550646, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9987545013427734}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9641054272651672}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.9365309476852417}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.93481874733697, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.8940635919570923}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.8575931787490845}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.8317781686782837}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7994922995567322}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.86653537280171, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 76, "power": 313.847}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7648830413818359}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [144, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7411973476409912}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [145, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7056658267974854}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.38296766613191, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [146, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6834872364997864}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [147, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6573166847229004}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [148, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6341385245323181}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.99400814941305, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [149, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6199974417686462}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [150, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.5888203978538513}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [151, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.5731779932975769}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 76, "power": 310.567}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [152, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.5541250109672546}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.49838550791851, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [153, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.536582350730896}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [154, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.5135989785194397}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [155, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.4957291781902313}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.350513309563595, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [156, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.47861233353614807}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [157, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.47117674350738525}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [158, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.4504289925098419}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.213560697789035, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [159, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.43357139825820923}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [160, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.4255506992340088}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [161, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.41500476002693176}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 76, "power": 292.487}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [162, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.4001453220844269}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.83641663794295, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [163, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3856187164783478}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [164, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.37681058049201965}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [165, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3642743229866028}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.455996520391444, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [166, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3536361753940582}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [167, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.34521564841270447}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [168, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3393672704696655}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.19585372940662, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [169, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3293442130088806}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [170, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.32399898767471313}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.99, "temperature": 76, "power": 295.921}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [171, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3113195300102234}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [172, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.3049860894680023}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 45.925075809186396, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [173, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2919404208660126}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [174, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.28530269861221313}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [175, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2786383628845215}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 44.47237037528416, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [176, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.27610689401626587}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [177, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2684989273548126}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [178, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2630639374256134}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.409456853843984, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [179, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2509020268917084}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 77, "power": 305.387}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [180, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.24854587018489838}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [181, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.24656544625759125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [182, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.23619654774665833}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.25022358861173, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [183, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.23337531089782715}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [184, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.22808784246444702}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [185, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.22517065703868866}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.16254138414351, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [186, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.21650603413581848}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [187, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.21727493405342102}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [188, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.21555562317371368}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.257377020540225, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 76, "power": 293.408}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [189, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.20680122077465057}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [190, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.20380568504333496}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [191, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.2012479454278946}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [192, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1972467303276062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.84302602690219, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [193, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1940830945968628}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [194, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1896078884601593}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [195, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.18643631041049957}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.106227768215604, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [196, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.18215446174144745}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [197, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.17853213846683502}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [198, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.17645688354969025}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 76, "power": 319.475}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.91136622045626, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [199, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1725698709487915}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [200, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.16997073590755463}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [201, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.16654980182647705}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.290797781591046, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [202, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1624709963798523}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [203, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.16435472667217255}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [204, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.16150528192520142}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [205, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.15788640081882477}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 46.88499118475356, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [206, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.15364141762256622}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [207, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.15300226211547852}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 77, "power": 312.32}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [208, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1467810720205307}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 43.394929907980334, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [209, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.14508351683616638}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [210, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.14274908602237701}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [211, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.14380408823490143}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 47.19525705915388, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [212, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.14112578332424164}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [213, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.1418134719133377}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [214, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.13803264498710632}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [215, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.13522985577583313}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 48.76431522380425, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [216, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.13362157344818115}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 0.98, "temperature": 77, "power": 299.946}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [217, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.13214538991451263}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [218, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.12823574244976044}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 42.62579659821314, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [35460.375, 81920.0], "load": 1.0, "temperature": 76, "power": 291.362}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-t5.D0-ce1dc503950f5ff93fdadb45d1d68afe.json", "-m", "bench", "--batch-size", "16", "--model", "T5", "--num-workers", "8", "--precision", "tf32-fp16"], "time": 1711996899.7411203, "return_code": 0}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/tf32.D0.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/tf32.D0.data new file mode 100644 index 000000000..d5d193ad4 --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/tf32.D0.data @@ -0,0 +1,124 @@ +{"event": "config", "data": {"argv": {"--dtype": "fp32", "--m": 8192, "--n": 8192, "--number": 10, "--repeat": 90, "--tf32": true}, "capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/flops", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "group": "flops", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 600, "name": "tf32", "plan": {"method": "per_gpu"}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["tf32", "D0"], "tags": ["diagnostic", "flops"], "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 60}}, "weight": 0.0, "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 73, "power": 92.392, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711996902.427254, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--dtype", "fp32", "--m", "8192", "--n", "8192", "--number", "10", "--repeat", "90", "--tf32"], "time": 1711996902.4375892}, "pipe": null} +{"event": "data", "data": {"task": "train", "rate": 116.91996872526215, "units": "Tflops", "t": 1711996904.2839668}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [882.5, 81920.0], "load": 0, "temperature": 69, "power": 55.734}}, "t": 1711996903.721732}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 0, "temperature": 70, "power": 377.35}}, "t": 1711996904.233873}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.34632127408685, "units": "Tflops", "t": 1711996904.4484208}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.01272273074997, "units": "Tflops", "t": 1711996904.6125836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.94771902835947, "units": "Tflops", "t": 1711996904.776809}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 298.407}}, "t": 1711996904.7463415}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.9975365795, "units": "Tflops", "t": 1711996904.9410536}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.52942438102184, "units": "Tflops", "t": 1711996905.1070569}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.2778657458181, "units": "Tflops", "t": 1711996905.2708797}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 305.602}}, "t": 1711996905.259678}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.9061030853169, "units": "Tflops", "t": 1711996905.4352062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.9008543127282, "units": "Tflops", "t": 1711996905.599487}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.95180422962704, "units": "Tflops", "t": 1711996905.7637026}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.9663962685375, "units": "Tflops", "t": 1711996905.9278994}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 75, "power": 300.345}}, "t": 1711996905.7705698}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.89016364127616, "units": "Tflops", "t": 1711996906.0922756}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.8810293291816, "units": "Tflops", "t": 1711996906.256587}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.92321305251843, "units": "Tflops", "t": 1711996906.420837}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 299.186}}, "t": 1711996906.2779436}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.89249600581212, "units": "Tflops", "t": 1711996906.5851986}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.88336137548646, "units": "Tflops", "t": 1711996906.7494993}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.91874070722702, "units": "Tflops", "t": 1711996906.9137707}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 296.662}}, "t": 1711996906.7892604}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.94480118000823, "units": "Tflops", "t": 1711996907.0780482}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.9397438773255, "units": "Tflops", "t": 1711996907.2423012}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.97593145111415, "units": "Tflops", "t": 1711996907.406487}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 298.795}}, "t": 1711996907.29642}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.97943451214562, "units": "Tflops", "t": 1711996907.5707333}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.9564733362106, "units": "Tflops", "t": 1711996907.734946}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.96328303347534, "units": "Tflops", "t": 1711996907.899151}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 298.697}}, "t": 1711996907.8061032}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.82955067201948, "units": "Tflops", "t": 1711996908.0635738}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.71630873973913, "units": "Tflops", "t": 1711996908.229327}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.01759081537435, "units": "Tflops", "t": 1711996908.3934736}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 295.39}}, "t": 1711996908.3134599}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.87947467677859, "units": "Tflops", "t": 1711996908.5578282}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.87811438554377, "units": "Tflops", "t": 1711996908.7221339}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.91543525268787, "units": "Tflops", "t": 1711996908.8864052}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 300.064}}, "t": 1711996908.8227253}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.87733708867765, "units": "Tflops", "t": 1711996909.0507643}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.88705394829344, "units": "Tflops", "t": 1711996909.2150607}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.8625701629678, "units": "Tflops", "t": 1711996909.3793936}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 298.213}}, "t": 1711996909.3298988}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.9152408192671, "units": "Tflops", "t": 1711996909.5437007}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.92671335720638, "units": "Tflops", "t": 1711996909.7079542}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.30980360404374, "units": "Tflops", "t": 1711996909.8729692}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 299.178}}, "t": 1711996909.8385897}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.57874697464635, "units": "Tflops", "t": 1711996910.037691}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.89249600581212, "units": "Tflops", "t": 1711996910.2019897}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.89443970499954, "units": "Tflops", "t": 1711996910.3662846}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 299.851}}, "t": 1711996910.346705}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.9364373858947, "units": "Tflops", "t": 1711996910.5305698}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.43149505608406, "units": "Tflops", "t": 1711996910.695426}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.7864307453013, "units": "Tflops", "t": 1711996910.8610818}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 293.451}}, "t": 1711996910.855078}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.91446309123, "units": "Tflops", "t": 1711996911.0254097}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.92593549590782, "units": "Tflops", "t": 1711996911.1896708}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.91154669154366, "units": "Tflops", "t": 1711996911.3539546}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.9356594116359, "units": "Tflops", "t": 1711996911.5181894}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 299.759}}, "t": 1711996911.3622386}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.84151027999977, "units": "Tflops", "t": 1711996911.6838589}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.67564518147444, "units": "Tflops", "t": 1711996911.8496542}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.93176967590452, "units": "Tflops", "t": 1711996912.013903}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 76, "power": 295.183}}, "t": 1711996911.8720958}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.9630884610883, "units": "Tflops", "t": 1711996912.178174}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.91115784784972, "units": "Tflops", "t": 1711996912.3424404}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.95005339857175, "units": "Tflops", "t": 1711996912.506668}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 301.034}}, "t": 1711996912.3794146}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.51281061653674, "units": "Tflops", "t": 1711996912.6714725}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.74419581468626, "units": "Tflops", "t": 1711996912.837182}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.00126949378173, "units": "Tflops", "t": 1711996913.0025694}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 295.588}}, "t": 1711996912.8865564}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.87150764987223, "units": "Tflops", "t": 1711996913.1669304}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.95180422962704, "units": "Tflops", "t": 1711996913.3311493}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.352592078033, "units": "Tflops", "t": 1711996913.4961104}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 307.64}}, "t": 1711996913.393953}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.77305025760035, "units": "Tflops", "t": 1711996913.6618567}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.3699465826653, "units": "Tflops", "t": 1711996913.8268013}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.7143990948615, "units": "Tflops", "t": 1711996913.992548}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 299.957}}, "t": 1711996913.9052892}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.65791788484373, "units": "Tflops", "t": 1711996914.157189}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.8952172004758, "units": "Tflops", "t": 1711996914.321474}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.31423536904967, "units": "Tflops", "t": 1711996914.4864848}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 296.831}}, "t": 1711996914.416831}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.05384022721637, "units": "Tflops", "t": 1711996914.6518564}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.36724669638775, "units": "Tflops", "t": 1711996914.8167922}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.66228701039012, "units": "Tflops", "t": 1711996914.982613}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 297.049}}, "t": 1711996914.9270992}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.69860445994024, "units": "Tflops", "t": 1711996915.1471865}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.78703779994657, "units": "Tflops", "t": 1711996915.311611}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.7789755694636, "units": "Tflops", "t": 1711996915.4772754}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 296.369}}, "t": 1711996915.4370909}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.38209742378564, "units": "Tflops", "t": 1711996915.6422398}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 134.06297837832602, "units": "Tflops", "t": 1711996915.8063285}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.0615182633709, "units": "Tflops", "t": 1711996915.971643}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 300.243}}, "t": 1711996915.9455404}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.98611995309378, "units": "Tflops", "t": 1711996916.1370978}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.6077718670839, "units": "Tflops", "t": 1711996916.3017485}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.4635458928627, "units": "Tflops", "t": 1711996916.4665751}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 300.047}}, "t": 1711996916.4536643}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.64282706153665, "units": "Tflops", "t": 1711996916.632478}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.7319471597393, "units": "Tflops", "t": 1711996916.7969742}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.8437276340636, "units": "Tflops", "t": 1711996916.961324}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.73999290280244, "units": "Tflops", "t": 1711996917.127047}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 301.321}}, "t": 1711996916.9619296}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.80023525224564, "units": "Tflops", "t": 1711996917.2914982}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.83984323615513, "units": "Tflops", "t": 1711996917.4558597}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.73005980533483, "units": "Tflops", "t": 1711996917.6215947}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 300.631}}, "t": 1711996917.4691346}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.74343162909292, "units": "Tflops", "t": 1711996917.7873673}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.69705404133202, "units": "Tflops", "t": 1711996917.951894}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.7875777697363, "units": "Tflops", "t": 1711996918.1175473}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 302.095}}, "t": 1711996917.9777055}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.87422799147083, "units": "Tflops", "t": 1711996918.2819352}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.94382859214022, "units": "Tflops", "t": 1711996918.4461696}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.0079824190062, "units": "Tflops", "t": 1711996918.6115518}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [2182.375, 81920.0], "load": 1.0, "temperature": 77, "power": 300.051}}, "t": 1711996918.4886498}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 132.77152123070732, "units": "Tflops", "t": 1711996918.7772722}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 133.24028274829357, "units": "Tflops", "t": 1711996918.9423656}, "pipe": "data"} +{"event": "end", "data": {"command": ["/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/activator", "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch", "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/flops/main.py", "--dtype", "fp32", "--m", "8192", "--n", "8192", "--number", "10", "--repeat", "90", "--tf32"], "time": 1711996919.5094998, "return_code": 0}, "pipe": null} diff --git a/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/whisper.D0.data b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/whisper.D0.data new file mode 100644 index 000000000..4a5001add --- /dev/null +++ b/paice-v1-9-g03d3434/NVIDIA_A100_80GB_PCIe/jofevozu.2024-04-01_14:01:06.047754/whisper.D0.data @@ -0,0 +1,684 @@ +{"event": "config", "data": {"argv": {"--batch-size": 64, "--model": "Whisper", "--num-workers": 8, "--precision": "tf32-fp16"}, "capabilities": {"nodes": 1}, "config_base": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config", "config_file": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/config/standard.yaml", "definition": "/Users/satyaortiz-gagne/travail/mila/CODE/milabench/benchmarks/huggingface", "dirs": {"base": "/Users/satyaortiz-gagne/travail/mila/milabench", "cache": "/Users/satyaortiz-gagne/travail/mila/milabench/cache", "data": "/Users/satyaortiz-gagne/travail/mila/milabench/data", "extra": "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf", "runs": "/Users/satyaortiz-gagne/travail/mila/milabench/runs", "venv": "/Users/satyaortiz-gagne/travail/mila/milabench/venv/torch"}, "enabled": true, "group": "hf", "hash": "6b20148b6ef891f7800e687f0e01e031", "install_group": "torch", "max_duration": 600, "name": "whisper", "plan": {"method": "per_gpu"}, "run_name": "jofevozu.2024-04-01_14:01:06.047754", "system": {"cloud_profiles": {"azure__a100": {"location": "eastus2", "size": "Standard_NC24ads_A100_v4", "username": "ubuntu"}}, "nodes": [{"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}], "self": {"name": "local", "ip": "127.0.0.1", "port": 8123, "user": "ubuntu", "main": true, "hostname": "127.0.0.1", "aliaslist": [], "ipaddrlist": ["10.0.1.4", "fe80::6245:bdff:fe79:6a4b%eth0", "127.0.0.1", "00:00:00:00:00:00", "::1", "60:45:bd:79:6a:4b"], "local": true}, "sshkey": null, "arch": "cuda"}, "tag": ["whisper", "D0"], "tags": ["audio", "huggingface"], "validation": {"usage": {"gpu_load_threshold": 0.5, "gpu_mem_threshold": 0.5}}, "voir": {"options": {"interval": "1s", "stop": 60}}, "weight": 1.0, "device": "0", "devices": ["0"], "env": {"CUDA_VISIBLE_DEVICES": "0"}}, "pipe": null} +{"event": "meta", "data": {"cpu": {"count": 24, "brand": "AMD EPYC 7V13 64-Core Processor"}, "os": {"sysname": "Linux", "nodename": "vm", "release": "6.5.0-1017-azure", "version": "#17~22.04.1-Ubuntu SMP Sat Mar 9 04:50:38 UTC 2024", "machine": "x86_64"}, "accelerators": {"arch": "cuda", "gpus": {"GPU-5695a3a6-ca9e-87a2-4cc7-d8ddc448256b": {"device": "0", "product": "NVIDIA A100 80GB PCIe", "memory": {"used": 882.4375, "total": 81920.0}, "utilization": {"compute": 0, "memory": 0.010771942138671876}, "temperature": 72, "power": 91.308, "selection_variable": "CUDA_VISIBLE_DEVICES"}}}, "date": 1711996922.249541, "milabench": {"tag": "paice-v1-9-g03d3434", "commit": "03d343430e749135f99d19e9c89ed0d4414b83f6", "date": "2024-03-27 13:40:33 -0400"}, "pytorch": {"torch": "2.1.0+cu118", "compiler": "GCC 9.3", "cpp": "C++ Version: 201703", "intel": "Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications", "mkl": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "openmp": "OpenMP 201511 (a.k.a. OpenMP 4.5)", "lapack": "LAPACK is enabled (usually provided by MKL)", "nnpack": "NNPACK is enabled", "cpu": "CPU capability usage: AVX2", "build_settings": {"BLAS_INFO": "mkl", "BUILD_TYPE": "Release", "CUDA_VERSION": "11.8", "CUDNN_VERSION": "8.7.0", "CXX_COMPILER": "/opt/rh/devtoolset-9/root/usr/bin/c++", "CXX_FLAGS": "-D_GLIBCXX_USE_CXX11_ABI=0 -fabi-version=11 -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -DNDEBUG -DUSE_KINETO -DLIBKINETO_NOROCTRACER -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wall -Wextra -Werror=return-type -Werror=non-virtual-dtor -Werror=bool-operation -Wnarrowing -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=old-style-cast -Wno-invalid-partial-specialization -Wno-unused-private-field -Wno-aligned-allocation-unavailable -Wno-missing-braces -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow", "LAPACK_INFO": "mkl", "PERF_WITH_AVX": "1", "PERF_WITH_AVX2": "1", "PERF_WITH_AVX512": "1", "TORCH_DISABLE_GPU_ASSERTS": "ON", "TORCH_VERSION": "2.1.0", "USE_CUDA": "ON", "USE_CUDNN": "ON", "USE_EXCEPTION_PTR": "1", "USE_GFLAGS": "OFF", "USE_GLOG": "OFF", "USE_MKL": "ON", "USE_MKLDNN": "ON", "USE_MPI": "OFF", "USE_NCCL": "1", "USE_NNPACK": "ON", "USE_OPENMP": "ON", "USE_ROCM": "OFF"}}}, "pipe": null} +{"event": "start", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-whisper.D0-ce1dc503950f5ff93fdadb45d1d68afe.json", "-m", "bench", "--batch-size", "64", "--model", "Whisper", "--num-workers", "8", "--precision", "tf32-fp16"], "time": 1711996922.2656007}, "pipe": null} +{"event": "phase", "data": {"name": "init"}, "pipe": "data"} +{"event": "phase", "data": {"name": "parse_args"}, "pipe": "data"} +{"event": "phase", "data": {"name": "load_script"}, "pipe": "data"} +{"event": "phase", "data": {"name": "run_script"}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [0, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1360.375, 81920.0], "load": 0, "temperature": 68, "power": 83.769}}}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [1360.375, 81920.0], "load": 0, "temperature": 67, "power": 82.134}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6992721557617188}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36710.375, 81920.0], "load": 0.35, "temperature": 70, "power": 308.708}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [1, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 1.4653310775756836}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [2, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.970947265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [3, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6924057006835938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [4, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7043609619140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [5, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6922607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [6, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6987991333007812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [7, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6994171142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [8, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912460327148438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [9, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6975479125976562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [10, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6967544555664062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [11, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912155151367188}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.92, "temperature": 72, "power": 302.889}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [12, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6952743530273438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [13, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6956329345703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [14, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [15, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6933135986328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [16, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6947784423828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [17, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6918106079101562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [18, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6918716430664062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [19, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6938247680664062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [20, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6923065185546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [21, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69122314453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [22, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6926803588867188}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 72, "power": 314.141}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 233.24355918320123, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [1, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [23, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6926116943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [24, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [25, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6917190551757812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [26, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69244384765625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.08141246339954, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [2, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [27, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6915664672851562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [28, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912841796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [29, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6919631958007812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [30, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6917724609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 216.806153266901, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [3, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [31, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [32, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [33, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691802978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 73, "power": 305.049}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [34, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6913375854492188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.53084778018362, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [4, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [35, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912460327148438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [36, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691558837890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [37, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914215087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [38, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.7973301816188, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [5, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [39, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691375732421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [40, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69146728515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [41, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912155151367188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [42, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912612915039062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.74186689131562, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [6, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [43, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6913833618164062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [44, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912765502929688}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.9, "temperature": 74, "power": 282.923}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [45, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [46, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912918090820312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.64266215820487, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [7, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [47, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [48, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [49, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [50, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912460327148438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 216.12324963215315, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [8, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [51, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [52, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [53, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912155151367188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [54, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912307739257812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.84011555799998, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [9, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [55, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.9, "temperature": 74, "power": 289.045}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [56, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [57, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912155151367188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [58, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.92864576817894, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [10, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [59, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [60, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912384033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [61, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [62, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.8675327198999, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [11, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [63, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912307739257812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [64, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [65, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [66, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.7881537563424, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [12, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [67, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 74, "power": 281.666}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [68, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [69, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [70, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.5249291076629, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [13, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [71, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [72, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [73, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [74, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.36853276531681, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [14, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [75, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [76, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [77, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911468505859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [78, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 74, "power": 289.27}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.6237014067847, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [15, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [79, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [80, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691131591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [81, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [82, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912002563476562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.91110883647258, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [16, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [83, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [84, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [85, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [86, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691131591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.9352654685548, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [17, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [87, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [88, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911468505859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [89, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 75, "power": 289.091}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [90, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.63307008870598, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [18, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [91, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [92, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [93, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [94, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.46126609083294, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [19, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [95, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [96, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [97, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [98, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.66160280392864, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [20, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [99, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.9, "temperature": 75, "power": 302.393}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [100, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [101, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [102, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.79617752290133, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [21, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [103, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [104, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [105, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [106, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.87273027752707, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [22, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [107, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [108, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [109, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [110, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.45203218829505, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [23, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [111, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.92, "temperature": 76, "power": 292.245}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [112, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [113, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [114, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69110107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.67671383764528, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [24, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [115, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [116, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [117, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [118, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691162109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.28558355042014, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [25, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [119, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691131591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [120, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [121, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911468505859375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [122, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 75, "power": 293.136}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.79346728390377, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [26, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [123, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [124, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [125, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [126, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.41554455802287, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [27, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [127, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [128, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691131591796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [129, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [130, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911087036132812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.8967693300167, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [28, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [131, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911392211914062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [132, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911087036132812}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.9, "temperature": 76, "power": 280.61}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [133, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [134, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.3252505967853, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [29, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [135, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [136, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [137, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69110107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [138, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911697387695312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.36585885616674, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [30, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [139, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911773681640625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [140, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910781860351562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [141, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [142, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.30996276237744, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [31, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [143, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910781860351562}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.9, "temperature": 76, "power": 275.594}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [144, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911087036132812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [145, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910629272460938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [146, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910858154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.79421105302617, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [32, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [147, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910629272460938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [148, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910552978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [149, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910476684570312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [150, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910552978515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.8632681293117, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [33, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [151, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910476684570312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [152, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910247802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [153, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691009521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [154, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6909866333007812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.85373235070585, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [34, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [155, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6909027099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 76, "power": 314.75}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [156, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6908340454101562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [157, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [158, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6919326782226562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.4179224383135, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [35, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [159, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914596557617188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [160, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910934448242188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [161, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914291381835938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [162, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691009521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.1825847735382, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [36, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [163, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69134521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [164, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691009521484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [165, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911544799804688}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 76, "power": 299.676}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [166, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69110107421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.46880775436117, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [37, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [167, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910018920898438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [168, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912841796875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [169, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910476684570312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [170, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691253662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.33144513338715, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [38, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [171, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910247802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [172, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [173, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910247802734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [174, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911163330078125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.69756747924419, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [39, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [175, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910324096679688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [176, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910324096679688}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.9, "temperature": 76, "power": 317.348}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [177, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910858154296875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [178, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6909027099609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.22497950624756, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [40, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [179, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6909942626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [180, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6908721923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [181, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6908798217773438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [182, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6908721923828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.66667952756455, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [41, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [183, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6906585693359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [184, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6905517578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [185, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6905288696289062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [186, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6909103393554688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.58767213086207, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [42, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [187, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6955337524414062}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 76, "power": 307.923}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [188, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7002067565917969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [189, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.697906494140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [190, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6913986206054688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.84027854747058, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [43, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [191, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6951828002929688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [192, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6938323974609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [193, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914520263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [194, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6948165893554688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.4740604022761, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [44, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [195, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912918090820312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [196, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6923904418945312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [197, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.693206787109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [198, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6908798217773438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.9698845539755, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [45, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [199, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6921310424804688}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 76, "power": 271.78}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [200, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6922607421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [201, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6906814575195312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [202, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6915359497070312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.8338943770837, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [46, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [203, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6916046142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [204, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6903610229492188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [205, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6906814575195312}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [206, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69073486328125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.56117968145918, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [47, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [207, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6897354125976562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [208, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6897735595703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [209, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6895599365234375}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.91, "temperature": 76, "power": 295.577}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [210, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6880340576171875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.59151539801294, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [48, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [211, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6874923706054688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [212, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6850967407226562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [213, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6821975708007812}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [214, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.67584228515625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.32367158203277, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [49, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [215, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6980819702148438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [216, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.832855224609375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [217, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.7104606628417969}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [218, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.708587646484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.54001925181723, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [50, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [219, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6919479370117188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [220, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6960067749023438}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.9, "temperature": 77, "power": 289.731}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [221, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6996231079101562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [222, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6961898803710938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.8765723997128, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [51, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [223, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914520263671875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [224, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69287109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [225, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6962814331054688}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [226, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6955718994140625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.61528147315875, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [52, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [227, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6918563842773438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [228, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691741943359375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [229, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.693878173828125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [230, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6942672729492188}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 216.2140449451061, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [53, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [231, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6923370361328125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.9, "temperature": 77, "power": 290.617}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [232, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912078857421875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [233, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69189453125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [234, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6931381225585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 216.1069971270415, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [54, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [235, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6926040649414062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [236, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6917266845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [237, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691192626953125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [238, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6917572021484375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.6388863233194, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [55, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [239, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69232177734375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [240, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6920166015625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [241, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [242, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691253662109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 213.95071397087423, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [56, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [243, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69140625}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.9, "temperature": 77, "power": 285.83}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [244, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.69171142578125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [245, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6917266845703125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [246, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6913909912109375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.078537041, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [57, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [247, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [248, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [249, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914443969726562}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [250, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6915283203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.8999173771098, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [58, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [251, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6913299560546875}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [252, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6910629272460938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [253, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912384033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.92, "temperature": 77, "power": 302.4}}}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [254, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 215.553938936565, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [59, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [255, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6914215087890625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [256, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.691314697265625}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [257, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912460327148438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [258, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911239624023438}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 217.008839168434, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [60, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [259, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6911849975585938}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [260, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912384033203125}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [261, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912689208984375}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "progress": [262, 100000]}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "loss": 0.6912612915039062}, "pipe": "data"} +{"event": "data", "data": {"task": "train", "rate": 214.60972017302655, "units": "items/s"}, "pipe": "data"} +{"event": "data", "data": {"task": "early_stop", "progress": [61, 60]}, "pipe": "data"} +{"event": "data", "data": {"task": "main", "gpudata": {"0": {"memory": [36740.375, 81920.0], "load": 0.9, "temperature": 77, "power": 286.49}}}, "pipe": "data"} +{"event": "phase", "data": {"name": "finalize"}, "pipe": "data"} +{"event": "end", "data": {"command": ["voir", "--config", "/Users/satyaortiz-gagne/travail/mila/milabench/extra/hf/voirconf-whisper.D0-ce1dc503950f5ff93fdadb45d1d68afe.json", "-m", "bench", "--batch-size", "64", "--model", "Whisper", "--num-workers", "8", "--precision", "tf32-fp16"], "time": 1711997005.572096, "return_code": 0}, "pipe": null}