{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.229820232640113, "global_step": 750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 0.0002, "loss": 1.0948, "step": 20 }, { "epoch": 0.11, "eval_loss": 0.854992151260376, "eval_runtime": 114.0549, "eval_samples_per_second": 17.535, "eval_steps_per_second": 0.281, "step": 20 }, { "epoch": 0.23, "learning_rate": 0.0002, "loss": 0.9283, "step": 40 }, { "epoch": 0.23, "eval_loss": 0.8098269104957581, "eval_runtime": 114.0416, "eval_samples_per_second": 17.537, "eval_steps_per_second": 0.281, "step": 40 }, { "epoch": 0.34, "learning_rate": 0.0002, "loss": 0.8873, "step": 60 }, { "epoch": 0.34, "eval_loss": 0.7932102084159851, "eval_runtime": 114.0727, "eval_samples_per_second": 17.533, "eval_steps_per_second": 0.281, "step": 60 }, { "epoch": 0.45, "learning_rate": 0.0002, "loss": 0.8689, "step": 80 }, { "epoch": 0.45, "eval_loss": 0.7797037363052368, "eval_runtime": 114.0119, "eval_samples_per_second": 17.542, "eval_steps_per_second": 0.281, "step": 80 }, { "epoch": 0.56, "learning_rate": 0.0002, "loss": 0.8613, "step": 100 }, { "epoch": 0.56, "eval_loss": 0.7756124138832092, "eval_runtime": 114.0328, "eval_samples_per_second": 17.539, "eval_steps_per_second": 0.281, "step": 100 }, { "epoch": 0.68, "learning_rate": 0.0002, "loss": 0.8514, "step": 120 }, { "epoch": 0.68, "eval_loss": 0.7713360786437988, "eval_runtime": 114.035, "eval_samples_per_second": 17.538, "eval_steps_per_second": 0.281, "step": 120 }, { "epoch": 0.79, "learning_rate": 0.0002, "loss": 0.8304, "step": 140 }, { "epoch": 0.79, "eval_loss": 0.7613078355789185, "eval_runtime": 113.9737, "eval_samples_per_second": 17.548, "eval_steps_per_second": 0.281, "step": 140 }, { "epoch": 0.9, "learning_rate": 0.0002, "loss": 0.8137, "step": 160 }, { "epoch": 0.9, "eval_loss": 0.7543554902076721, "eval_runtime": 114.008, "eval_samples_per_second": 17.543, "eval_steps_per_second": 0.281, "step": 160 }, { "epoch": 1.02, "learning_rate": 0.0002, "loss": 0.7984, "step": 180 }, { "epoch": 1.02, "eval_loss": 0.7566795945167542, "eval_runtime": 113.9491, "eval_samples_per_second": 17.552, "eval_steps_per_second": 0.281, "step": 180 }, { "epoch": 1.13, "learning_rate": 0.0002, "loss": 0.486, "step": 200 }, { "epoch": 1.13, "eval_loss": 0.7874847054481506, "eval_runtime": 114.0046, "eval_samples_per_second": 17.543, "eval_steps_per_second": 0.281, "step": 200 }, { "epoch": 1.24, "learning_rate": 0.0002, "loss": 0.4904, "step": 220 }, { "epoch": 1.24, "eval_loss": 0.7797561287879944, "eval_runtime": 113.9329, "eval_samples_per_second": 17.554, "eval_steps_per_second": 0.281, "step": 220 }, { "epoch": 1.35, "learning_rate": 0.0002, "loss": 0.4875, "step": 240 }, { "epoch": 1.35, "eval_loss": 0.7851331830024719, "eval_runtime": 113.9196, "eval_samples_per_second": 17.556, "eval_steps_per_second": 0.281, "step": 240 }, { "epoch": 1.47, "learning_rate": 0.0002, "loss": 0.4937, "step": 260 }, { "epoch": 1.47, "eval_loss": 0.7893801927566528, "eval_runtime": 113.9255, "eval_samples_per_second": 17.555, "eval_steps_per_second": 0.281, "step": 260 }, { "epoch": 1.58, "learning_rate": 0.0002, "loss": 0.4941, "step": 280 }, { "epoch": 1.58, "eval_loss": 0.7871307134628296, "eval_runtime": 113.9477, "eval_samples_per_second": 17.552, "eval_steps_per_second": 0.281, "step": 280 }, { "epoch": 1.69, "learning_rate": 0.0002, "loss": 0.5171, "step": 300 }, { "epoch": 1.69, "eval_loss": 0.7889020442962646, "eval_runtime": 113.9211, "eval_samples_per_second": 17.556, "eval_steps_per_second": 0.281, "step": 300 }, { "epoch": 1.8, "learning_rate": 0.0002, "loss": 0.4989, "step": 320 }, { "epoch": 1.8, "eval_loss": 0.7817031145095825, "eval_runtime": 113.9558, "eval_samples_per_second": 17.551, "eval_steps_per_second": 0.281, "step": 320 }, { "epoch": 1.92, "learning_rate": 0.0002, "loss": 0.4973, "step": 340 }, { "epoch": 1.92, "eval_loss": 0.7942374348640442, "eval_runtime": 113.914, "eval_samples_per_second": 17.557, "eval_steps_per_second": 0.281, "step": 340 }, { "epoch": 2.03, "learning_rate": 0.0002, "loss": 0.457, "step": 360 }, { "epoch": 2.03, "eval_loss": 0.8580000400543213, "eval_runtime": 113.9119, "eval_samples_per_second": 17.557, "eval_steps_per_second": 0.281, "step": 360 }, { "epoch": 2.14, "learning_rate": 0.0002, "loss": 0.2893, "step": 380 }, { "epoch": 2.14, "eval_loss": 0.8477640151977539, "eval_runtime": 113.8661, "eval_samples_per_second": 17.564, "eval_steps_per_second": 0.281, "step": 380 }, { "epoch": 2.26, "learning_rate": 0.0002, "loss": 0.3032, "step": 400 }, { "epoch": 2.26, "eval_loss": 0.8502757549285889, "eval_runtime": 113.9303, "eval_samples_per_second": 17.555, "eval_steps_per_second": 0.281, "step": 400 }, { "epoch": 2.37, "learning_rate": 0.0002, "loss": 0.2967, "step": 420 }, { "epoch": 2.37, "eval_loss": 0.8499388694763184, "eval_runtime": 113.925, "eval_samples_per_second": 17.555, "eval_steps_per_second": 0.281, "step": 420 }, { "epoch": 2.48, "learning_rate": 0.0002, "loss": 0.3125, "step": 440 }, { "epoch": 2.48, "eval_loss": 0.8446888327598572, "eval_runtime": 113.9432, "eval_samples_per_second": 17.553, "eval_steps_per_second": 0.281, "step": 440 }, { "epoch": 2.59, "learning_rate": 0.0002, "loss": 0.3051, "step": 460 }, { "epoch": 2.59, "eval_loss": 0.8607853055000305, "eval_runtime": 113.9087, "eval_samples_per_second": 17.558, "eval_steps_per_second": 0.281, "step": 460 }, { "epoch": 2.71, "learning_rate": 0.0002, "loss": 0.3021, "step": 480 }, { "epoch": 2.71, "eval_loss": 0.8590434789657593, "eval_runtime": 113.9419, "eval_samples_per_second": 17.553, "eval_steps_per_second": 0.281, "step": 480 }, { "epoch": 2.82, "learning_rate": 0.0002, "loss": 0.3218, "step": 500 }, { "epoch": 2.82, "eval_loss": 0.8519604206085205, "eval_runtime": 113.9329, "eval_samples_per_second": 17.554, "eval_steps_per_second": 0.281, "step": 500 }, { "epoch": 2.93, "learning_rate": 0.0002, "loss": 0.3291, "step": 520 }, { "epoch": 2.93, "eval_loss": 0.8613017797470093, "eval_runtime": 113.9591, "eval_samples_per_second": 17.55, "eval_steps_per_second": 0.281, "step": 520 }, { "epoch": 3.05, "learning_rate": 0.0002, "loss": 0.2542, "step": 540 }, { "epoch": 3.05, "eval_loss": 0.9529324769973755, "eval_runtime": 113.9447, "eval_samples_per_second": 17.552, "eval_steps_per_second": 0.281, "step": 540 }, { "epoch": 3.16, "learning_rate": 0.0002, "loss": 0.1857, "step": 560 }, { "epoch": 3.16, "eval_loss": 0.9232375621795654, "eval_runtime": 113.9261, "eval_samples_per_second": 17.555, "eval_steps_per_second": 0.281, "step": 560 }, { "epoch": 3.27, "learning_rate": 0.0002, "loss": 0.1733, "step": 580 }, { "epoch": 3.27, "eval_loss": 0.9417396783828735, "eval_runtime": 113.9558, "eval_samples_per_second": 17.551, "eval_steps_per_second": 0.281, "step": 580 }, { "epoch": 3.38, "learning_rate": 0.0002, "loss": 0.1786, "step": 600 }, { "epoch": 3.38, "eval_loss": 0.9412431716918945, "eval_runtime": 113.983, "eval_samples_per_second": 17.546, "eval_steps_per_second": 0.281, "step": 600 }, { "epoch": 3.5, "learning_rate": 0.0002, "loss": 0.1824, "step": 620 }, { "epoch": 3.5, "eval_loss": 0.930392324924469, "eval_runtime": 113.9599, "eval_samples_per_second": 17.55, "eval_steps_per_second": 0.281, "step": 620 }, { "epoch": 3.61, "learning_rate": 0.0002, "loss": 0.1929, "step": 640 }, { "epoch": 3.61, "eval_loss": 0.949115514755249, "eval_runtime": 113.9373, "eval_samples_per_second": 17.554, "eval_steps_per_second": 0.281, "step": 640 }, { "epoch": 3.72, "learning_rate": 0.0002, "loss": 0.1872, "step": 660 }, { "epoch": 3.72, "eval_loss": 0.9479944109916687, "eval_runtime": 113.9937, "eval_samples_per_second": 17.545, "eval_steps_per_second": 0.281, "step": 660 }, { "epoch": 3.84, "learning_rate": 0.0002, "loss": 0.1868, "step": 680 }, { "epoch": 3.84, "eval_loss": 0.9432507157325745, "eval_runtime": 113.9872, "eval_samples_per_second": 17.546, "eval_steps_per_second": 0.281, "step": 680 }, { "epoch": 3.95, "learning_rate": 0.0002, "loss": 0.1911, "step": 700 }, { "epoch": 3.95, "eval_loss": 0.9545673727989197, "eval_runtime": 113.9472, "eval_samples_per_second": 17.552, "eval_steps_per_second": 0.281, "step": 700 }, { "epoch": 4.06, "learning_rate": 0.0002, "loss": 0.1445, "step": 720 }, { "epoch": 4.06, "eval_loss": 1.005204200744629, "eval_runtime": 113.9943, "eval_samples_per_second": 17.545, "eval_steps_per_second": 0.281, "step": 720 }, { "epoch": 4.17, "learning_rate": 0.0002, "loss": 0.1033, "step": 740 }, { "epoch": 4.17, "eval_loss": 1.011751413345337, "eval_runtime": 114.0191, "eval_samples_per_second": 17.541, "eval_steps_per_second": 0.281, "step": 740 }, { "before_init_mem_cpu": 2210148352, "before_init_mem_gpu": 8491955200, "epoch": 4.23, "init_mem_cpu_alloc_delta": 0, "init_mem_cpu_peaked_delta": 0, "init_mem_gpu_alloc_delta": 0, "init_mem_gpu_peaked_delta": 0, "step": 750, "total_flos": 7.611498484214006e+17, "train_loss": 0.44945241530736285, "train_mem_cpu_alloc_delta": 4622671872, "train_mem_cpu_peaked_delta": 0, "train_mem_gpu_alloc_delta": 1833328128, "train_mem_gpu_peaked_delta": 32450574848, "train_runtime": 14193.5716, "train_samples_per_second": 6.764, "train_steps_per_second": 0.053 } ], "max_steps": 750, "num_train_epochs": 5, "total_flos": 7.611498484214006e+17, "trial_name": null, "trial_params": null }