| { | |
| "best_metric": 2.7371606826782227, | |
| "best_model_checkpoint": "/data1/attanasiog/babylm/roberta-tiny-10M/checkpoint-4150", | |
| "epoch": 89.57546563904945, | |
| "global_step": 4300, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 8e-05, | |
| "loss": 10.4287, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00016, | |
| "loss": 9.0477, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00024, | |
| "loss": 7.8228, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00032, | |
| "loss": 7.3343, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.0004, | |
| "loss": 7.8031, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "eval_accuracy": 0.06061240850112075, | |
| "eval_loss": 7.355990886688232, | |
| "eval_runtime": 145.9523, | |
| "eval_samples_per_second": 164.814, | |
| "eval_steps_per_second": 5.152, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 0.000399995625676045, | |
| "loss": 7.2898, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 0.0003999825028955268, | |
| "loss": 7.1829, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 0.0003999606322324786, | |
| "loss": 7.0831, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 0.0003999300146435939, | |
| "loss": 6.8807, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 0.00039989065146818525, | |
| "loss": 7.1948, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "eval_accuracy": 0.11823707432860285, | |
| "eval_loss": 6.737408638000488, | |
| "eval_runtime": 145.8622, | |
| "eval_samples_per_second": 164.916, | |
| "eval_steps_per_second": 5.156, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 0.0003998425444281255, | |
| "loss": 6.659, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 0.00039978569562777234, | |
| "loss": 6.5924, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 0.0003997201075538765, | |
| "loss": 6.5237, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 0.0003996457830754729, | |
| "loss": 6.4927, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 0.00039956272544375493, | |
| "loss": 6.8927, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "eval_accuracy": 0.1414597356195163, | |
| "eval_loss": 6.502169609069824, | |
| "eval_runtime": 145.9635, | |
| "eval_samples_per_second": 164.801, | |
| "eval_steps_per_second": 5.152, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 0.00039947093829193245, | |
| "loss": 6.4155, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 0.00039937042563507283, | |
| "loss": 6.4041, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 0.00039926119186992537, | |
| "loss": 6.3875, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 3.95, | |
| "learning_rate": 0.0003991432417747288, | |
| "loss": 6.3543, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 0.0003990165805090023, | |
| "loss": 6.7339, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "eval_accuracy": 0.1482938589304516, | |
| "eval_loss": 6.400519847869873, | |
| "eval_runtime": 145.8639, | |
| "eval_samples_per_second": 164.914, | |
| "eval_steps_per_second": 5.155, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 0.00039888121361332003, | |
| "loss": 6.3085, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 0.0003987371470090686, | |
| "loss": 6.3213, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 0.00039858438699818784, | |
| "loss": 6.2931, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 0.0003984229402628956, | |
| "loss": 6.2716, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "learning_rate": 0.00039825281386539503, | |
| "loss": 6.6609, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "eval_accuracy": 0.1509599365008845, | |
| "eval_loss": 6.3535308837890625, | |
| "eval_runtime": 145.9161, | |
| "eval_samples_per_second": 164.855, | |
| "eval_steps_per_second": 5.154, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "learning_rate": 0.000398074015247566, | |
| "loss": 6.2501, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 0.0003978865522306392, | |
| "loss": 6.2436, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "learning_rate": 0.0003976904330148543, | |
| "loss": 6.2418, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "learning_rate": 0.00039748566617910113, | |
| "loss": 6.6426, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "learning_rate": 0.0003972722606805445, | |
| "loss": 6.1972, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "eval_accuracy": 0.15188271193711186, | |
| "eval_loss": 6.332435607910156, | |
| "eval_runtime": 146.0198, | |
| "eval_samples_per_second": 164.738, | |
| "eval_steps_per_second": 5.15, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 0.00039705022585423216, | |
| "loss": 6.2183, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 6.66, | |
| "learning_rate": 0.0003968195714126868, | |
| "loss": 6.1899, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "learning_rate": 0.00039658030744548075, | |
| "loss": 6.192, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 7.08, | |
| "learning_rate": 0.0003963324444187952, | |
| "loss": 6.5971, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 7.29, | |
| "learning_rate": 0.0003960759931749619, | |
| "loss": 6.1685, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 7.29, | |
| "eval_accuracy": 0.15276707185574287, | |
| "eval_loss": 6.302943706512451, | |
| "eval_runtime": 145.9601, | |
| "eval_samples_per_second": 164.805, | |
| "eval_steps_per_second": 5.152, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "learning_rate": 0.00039581096493198893, | |
| "loss": 6.1653, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 7.7, | |
| "learning_rate": 0.0003955373712830703, | |
| "loss": 6.1623, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 7.9, | |
| "learning_rate": 0.00039525522419607854, | |
| "loss": 6.1604, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "learning_rate": 0.0003949645360130412, | |
| "loss": 6.5496, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 8.33, | |
| "learning_rate": 0.0003946653194496012, | |
| "loss": 6.1302, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 8.33, | |
| "eval_accuracy": 0.152128546451089, | |
| "eval_loss": 6.2827558517456055, | |
| "eval_runtime": 145.9935, | |
| "eval_samples_per_second": 164.768, | |
| "eval_steps_per_second": 5.151, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 8.53, | |
| "learning_rate": 0.00039435758759446025, | |
| "loss": 6.1368, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 8.74, | |
| "learning_rate": 0.00039404135390880664, | |
| "loss": 6.1171, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 8.95, | |
| "learning_rate": 0.0003937166322257262, | |
| "loss": 6.1463, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 9.16, | |
| "learning_rate": 0.00039338343674959745, | |
| "loss": 6.537, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 9.37, | |
| "learning_rate": 0.00039304178205546976, | |
| "loss": 6.093, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 9.37, | |
| "eval_accuracy": 0.15364162638834264, | |
| "eval_loss": 6.256844520568848, | |
| "eval_runtime": 146.039, | |
| "eval_samples_per_second": 164.716, | |
| "eval_steps_per_second": 5.149, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "learning_rate": 0.00039269168308842634, | |
| "loss": 6.0973, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 9.78, | |
| "learning_rate": 0.00039233315516293006, | |
| "loss": 6.1012, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 9.99, | |
| "learning_rate": 0.00039196621396215403, | |
| "loss": 6.0809, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 10.21, | |
| "learning_rate": 0.000391590875537295, | |
| "loss": 6.4765, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 10.41, | |
| "learning_rate": 0.00039120715630687155, | |
| "loss": 6.0543, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 10.41, | |
| "eval_accuracy": 0.15444620739515735, | |
| "eval_loss": 6.24298620223999, | |
| "eval_runtime": 145.9243, | |
| "eval_samples_per_second": 164.846, | |
| "eval_steps_per_second": 5.153, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 10.62, | |
| "learning_rate": 0.000390815073056006, | |
| "loss": 6.0777, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 10.82, | |
| "learning_rate": 0.00039041464293568983, | |
| "loss": 6.0697, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 11.04, | |
| "learning_rate": 0.00039000588346203374, | |
| "loss": 6.4636, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 11.25, | |
| "learning_rate": 0.0003895888125155014, | |
| "loss": 6.0487, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 11.45, | |
| "learning_rate": 0.00038916344834012695, | |
| "loss": 6.0479, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 11.45, | |
| "eval_accuracy": 0.1541217862327054, | |
| "eval_loss": 6.234572887420654, | |
| "eval_runtime": 145.8799, | |
| "eval_samples_per_second": 164.896, | |
| "eval_steps_per_second": 5.155, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 11.66, | |
| "learning_rate": 0.00038872980954271757, | |
| "loss": 6.0617, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 11.86, | |
| "learning_rate": 0.00038828791509203895, | |
| "loss": 6.0441, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 12.08, | |
| "learning_rate": 0.00038783778431798597, | |
| "loss": 6.4461, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 12.29, | |
| "learning_rate": 0.0003873794369107369, | |
| "loss": 6.0258, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 12.49, | |
| "learning_rate": 0.0003869128929198922, | |
| "loss": 6.0372, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 12.49, | |
| "eval_accuracy": 0.1545538581772011, | |
| "eval_loss": 6.223215103149414, | |
| "eval_runtime": 145.9665, | |
| "eval_samples_per_second": 164.798, | |
| "eval_steps_per_second": 5.152, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 12.7, | |
| "learning_rate": 0.0003864381727535973, | |
| "loss": 6.0353, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 12.9, | |
| "learning_rate": 0.00038595529717765027, | |
| "loss": 6.041, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 13.12, | |
| "learning_rate": 0.0003854642873145931, | |
| "loss": 6.4207, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 13.33, | |
| "learning_rate": 0.00038496516464278776, | |
| "loss": 6.006, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 13.53, | |
| "learning_rate": 0.00038445795099547697, | |
| "loss": 6.0127, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 13.53, | |
| "eval_accuracy": 0.15411265298876436, | |
| "eval_loss": 6.213912010192871, | |
| "eval_runtime": 145.9328, | |
| "eval_samples_per_second": 164.836, | |
| "eval_steps_per_second": 5.153, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 13.74, | |
| "learning_rate": 0.0003839426685598287, | |
| "loss": 6.0006, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 13.95, | |
| "learning_rate": 0.000383419339875966, | |
| "loss": 6.0152, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 14.16, | |
| "learning_rate": 0.00038288798783598087, | |
| "loss": 6.3908, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 14.37, | |
| "learning_rate": 0.0003823486356829329, | |
| "loss": 5.9744, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 14.58, | |
| "learning_rate": 0.0003818013070098325, | |
| "loss": 5.968, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 14.58, | |
| "eval_accuracy": 0.15472111663693397, | |
| "eval_loss": 6.20527458190918, | |
| "eval_runtime": 145.9446, | |
| "eval_samples_per_second": 164.823, | |
| "eval_steps_per_second": 5.153, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 14.78, | |
| "learning_rate": 0.0003812460257586089, | |
| "loss": 5.9813, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 14.99, | |
| "learning_rate": 0.000380682816219063, | |
| "loss": 6.0108, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 15.21, | |
| "learning_rate": 0.00038011170302780446, | |
| "loss": 6.3495, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 15.41, | |
| "learning_rate": 0.00037953271116717444, | |
| "loss": 5.9708, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 15.62, | |
| "learning_rate": 0.0003789458659641527, | |
| "loss": 5.9635, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 15.62, | |
| "eval_accuracy": 0.15486276242328167, | |
| "eval_loss": 6.199557781219482, | |
| "eval_runtime": 145.9791, | |
| "eval_samples_per_second": 164.784, | |
| "eval_steps_per_second": 5.151, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 15.82, | |
| "learning_rate": 0.0003783511930892495, | |
| "loss": 5.9756, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 16.04, | |
| "learning_rate": 0.00037774871855538275, | |
| "loss": 6.3631, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 16.25, | |
| "learning_rate": 0.00037713846871674045, | |
| "loss": 5.9497, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 16.45, | |
| "learning_rate": 0.0003765204702676274, | |
| "loss": 5.9433, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 16.66, | |
| "learning_rate": 0.0003758947502412978, | |
| "loss": 5.9479, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 16.66, | |
| "eval_accuracy": 0.15478355696794033, | |
| "eval_loss": 6.195274353027344, | |
| "eval_runtime": 145.939, | |
| "eval_samples_per_second": 164.829, | |
| "eval_steps_per_second": 5.153, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 16.86, | |
| "learning_rate": 0.0003752613360087727, | |
| "loss": 5.9614, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 17.08, | |
| "learning_rate": 0.00037462025527764265, | |
| "loss": 6.326, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 17.29, | |
| "learning_rate": 0.00037397153609085553, | |
| "loss": 5.9293, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 17.49, | |
| "learning_rate": 0.0003733152068254901, | |
| "loss": 5.9305, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 17.7, | |
| "learning_rate": 0.00037265129619151483, | |
| "loss": 5.9371, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 17.7, | |
| "eval_accuracy": 0.15451778319531595, | |
| "eval_loss": 6.1887054443359375, | |
| "eval_runtime": 145.8431, | |
| "eval_samples_per_second": 164.938, | |
| "eval_steps_per_second": 5.156, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 17.9, | |
| "learning_rate": 0.00037197983323053143, | |
| "loss": 5.9348, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 18.12, | |
| "learning_rate": 0.00037130084731450515, | |
| "loss": 6.2994, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 18.33, | |
| "learning_rate": 0.0003706143681444795, | |
| "loss": 5.8969, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 18.53, | |
| "learning_rate": 0.0003699204257492774, | |
| "loss": 5.9219, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 18.74, | |
| "learning_rate": 0.0003692190504841871, | |
| "loss": 5.9046, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 18.74, | |
| "eval_accuracy": 0.1545486653674884, | |
| "eval_loss": 6.161332130432129, | |
| "eval_runtime": 145.9406, | |
| "eval_samples_per_second": 164.827, | |
| "eval_steps_per_second": 5.153, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 18.95, | |
| "learning_rate": 0.00036851027302963493, | |
| "loss": 5.9011, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 19.16, | |
| "learning_rate": 0.00036779412438984294, | |
| "loss": 6.2593, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 19.37, | |
| "learning_rate": 0.0003670706358914725, | |
| "loss": 5.8755, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 19.58, | |
| "learning_rate": 0.0003663398391822543, | |
| "loss": 5.8396, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 19.78, | |
| "learning_rate": 0.00036560176622960403, | |
| "loss": 5.8368, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 19.78, | |
| "eval_accuracy": 0.15570189218059025, | |
| "eval_loss": 6.095159530639648, | |
| "eval_runtime": 145.7599, | |
| "eval_samples_per_second": 165.032, | |
| "eval_steps_per_second": 5.159, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 19.99, | |
| "learning_rate": 0.00036485644931922353, | |
| "loss": 5.8184, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 20.21, | |
| "learning_rate": 0.0003641039210536889, | |
| "loss": 6.1866, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 20.41, | |
| "learning_rate": 0.0003633442143510245, | |
| "loss": 5.7848, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 20.62, | |
| "learning_rate": 0.00036257736244326246, | |
| "loss": 5.7807, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 20.82, | |
| "learning_rate": 0.0003618033988749895, | |
| "loss": 5.7914, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 20.82, | |
| "eval_accuracy": 0.15694020859066315, | |
| "eval_loss": 6.032991409301758, | |
| "eval_runtime": 145.9881, | |
| "eval_samples_per_second": 164.774, | |
| "eval_steps_per_second": 5.151, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 21.04, | |
| "learning_rate": 0.0003610223575018795, | |
| "loss": 6.1552, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 21.25, | |
| "learning_rate": 0.00036023427248921215, | |
| "loss": 5.7428, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 21.45, | |
| "learning_rate": 0.0003594391783103792, | |
| "loss": 5.7276, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 21.66, | |
| "learning_rate": 0.00035863710974537563, | |
| "loss": 5.7289, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 21.86, | |
| "learning_rate": 0.00035782810187927875, | |
| "loss": 5.7026, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 21.86, | |
| "eval_accuracy": 0.16123595961673237, | |
| "eval_loss": 5.942953109741211, | |
| "eval_runtime": 145.9911, | |
| "eval_samples_per_second": 164.77, | |
| "eval_steps_per_second": 5.151, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 22.08, | |
| "learning_rate": 0.0003570121901007136, | |
| "loss": 6.0423, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 22.29, | |
| "learning_rate": 0.0003561894101003044, | |
| "loss": 5.6495, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 22.49, | |
| "learning_rate": 0.00035535979786911396, | |
| "loss": 5.6223, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 22.7, | |
| "learning_rate": 0.00035452338969706876, | |
| "loss": 5.5675, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 22.9, | |
| "learning_rate": 0.00035368022217137184, | |
| "loss": 5.491, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 22.9, | |
| "eval_accuracy": 0.19736824293775215, | |
| "eval_loss": 5.609994888305664, | |
| "eval_runtime": 146.0961, | |
| "eval_samples_per_second": 164.652, | |
| "eval_steps_per_second": 5.147, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 23.12, | |
| "learning_rate": 0.00035283033217490227, | |
| "loss": 5.6961, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 23.33, | |
| "learning_rate": 0.00035197375688460176, | |
| "loss": 5.239, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 23.53, | |
| "learning_rate": 0.0003511105337698484, | |
| "loss": 5.1252, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 23.74, | |
| "learning_rate": 0.0003502407005908177, | |
| "loss": 5.0182, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 23.95, | |
| "learning_rate": 0.0003493642953968308, | |
| "loss": 4.9289, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 23.95, | |
| "eval_accuracy": 0.27019214299497635, | |
| "eval_loss": 4.960735321044922, | |
| "eval_runtime": 146.1516, | |
| "eval_samples_per_second": 164.589, | |
| "eval_steps_per_second": 5.145, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 24.16, | |
| "learning_rate": 0.00034848135652469, | |
| "loss": 5.1346, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 24.37, | |
| "learning_rate": 0.00034759192259700196, | |
| "loss": 4.7377, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 24.58, | |
| "learning_rate": 0.000346696032520488, | |
| "loss": 4.6538, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 24.78, | |
| "learning_rate": 0.00034579372548428235, | |
| "loss": 4.608, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 24.99, | |
| "learning_rate": 0.00034488504095821784, | |
| "loss": 4.5214, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 24.99, | |
| "eval_accuracy": 0.3050591252908655, | |
| "eval_loss": 4.579548358917236, | |
| "eval_runtime": 146.015, | |
| "eval_samples_per_second": 164.743, | |
| "eval_steps_per_second": 5.15, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 25.21, | |
| "learning_rate": 0.0003439700186910993, | |
| "loss": 4.7508, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 25.41, | |
| "learning_rate": 0.00034304869870896513, | |
| "loss": 4.4132, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 25.62, | |
| "learning_rate": 0.00034212112131333587, | |
| "loss": 4.3489, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 25.82, | |
| "learning_rate": 0.0003411873270794518, | |
| "loss": 4.3454, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 26.04, | |
| "learning_rate": 0.00034024735685449773, | |
| "loss": 4.5663, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 26.04, | |
| "eval_accuracy": 0.32645309469898054, | |
| "eval_loss": 4.345365047454834, | |
| "eval_runtime": 146.0915, | |
| "eval_samples_per_second": 164.657, | |
| "eval_steps_per_second": 5.147, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 26.25, | |
| "learning_rate": 0.00033930125175581647, | |
| "loss": 4.2188, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 26.45, | |
| "learning_rate": 0.0003383490531691099, | |
| "loss": 4.1928, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 26.66, | |
| "learning_rate": 0.0003373908027466289, | |
| "loss": 4.1575, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 26.86, | |
| "learning_rate": 0.00033642654240535134, | |
| "loss": 4.1106, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 27.08, | |
| "learning_rate": 0.00033545631432514825, | |
| "loss": 4.3717, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 27.08, | |
| "eval_accuracy": 0.3412254938630985, | |
| "eval_loss": 4.1738104820251465, | |
| "eval_runtime": 145.9707, | |
| "eval_samples_per_second": 164.793, | |
| "eval_steps_per_second": 5.152, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 27.29, | |
| "learning_rate": 0.00033448016094693895, | |
| "loss": 4.007, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 27.49, | |
| "learning_rate": 0.0003334981249708345, | |
| "loss": 4.003, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 27.7, | |
| "learning_rate": 0.00033251024935427, | |
| "loss": 3.9491, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 27.9, | |
| "learning_rate": 0.0003315165773101249, | |
| "loss": 3.9411, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 28.12, | |
| "learning_rate": 0.00033051715230483374, | |
| "loss": 4.1483, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 28.12, | |
| "eval_accuracy": 0.35552299245507185, | |
| "eval_loss": 4.033575534820557, | |
| "eval_runtime": 145.9738, | |
| "eval_samples_per_second": 164.79, | |
| "eval_steps_per_second": 5.152, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 28.33, | |
| "learning_rate": 0.0003295120180564838, | |
| "loss": 3.8395, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 28.53, | |
| "learning_rate": 0.00032850121853290334, | |
| "loss": 3.8271, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 28.74, | |
| "learning_rate": 0.000327484797949738, | |
| "loss": 3.8272, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 28.95, | |
| "learning_rate": 0.00032646280076851684, | |
| "loss": 3.7855, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 29.16, | |
| "learning_rate": 0.0003254352716947074, | |
| "loss": 3.9988, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 29.16, | |
| "eval_accuracy": 0.3677331361148426, | |
| "eval_loss": 3.91800594329834, | |
| "eval_runtime": 146.056, | |
| "eval_samples_per_second": 164.697, | |
| "eval_steps_per_second": 5.149, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 29.37, | |
| "learning_rate": 0.0003244022556757602, | |
| "loss": 3.7379, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 29.58, | |
| "learning_rate": 0.0003233637978991422, | |
| "loss": 3.6974, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 29.78, | |
| "learning_rate": 0.00032231994379036086, | |
| "loss": 3.6966, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 29.99, | |
| "learning_rate": 0.0003212707390109765, | |
| "loss": 3.6594, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 30.21, | |
| "learning_rate": 0.00032021622945660504, | |
| "loss": 3.8695, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 30.21, | |
| "eval_accuracy": 0.37818666192863265, | |
| "eval_loss": 3.81080961227417, | |
| "eval_runtime": 146.0723, | |
| "eval_samples_per_second": 164.679, | |
| "eval_steps_per_second": 5.148, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 30.41, | |
| "learning_rate": 0.0003191564612549106, | |
| "loss": 3.598, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 30.62, | |
| "learning_rate": 0.0003180914807635874, | |
| "loss": 3.5942, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 30.82, | |
| "learning_rate": 0.00031702133456833236, | |
| "loss": 3.585, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 31.04, | |
| "learning_rate": 0.00031594606948080663, | |
| "loss": 3.7908, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 31.25, | |
| "learning_rate": 0.00031486573253658874, | |
| "loss": 3.5017, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 31.25, | |
| "eval_accuracy": 0.3878577124364749, | |
| "eval_loss": 3.7240185737609863, | |
| "eval_runtime": 145.8744, | |
| "eval_samples_per_second": 164.902, | |
| "eval_steps_per_second": 5.155, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 31.45, | |
| "learning_rate": 0.00031378037099311627, | |
| "loss": 3.5206, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 31.66, | |
| "learning_rate": 0.00031269003232761933, | |
| "loss": 3.5049, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 31.86, | |
| "learning_rate": 0.0003115947642350433, | |
| "loss": 3.4852, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 32.08, | |
| "learning_rate": 0.00031049461462596267, | |
| "loss": 3.6894, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 32.29, | |
| "learning_rate": 0.00030938963162448544, | |
| "loss": 3.4311, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 32.29, | |
| "eval_accuracy": 0.3973612765821424, | |
| "eval_loss": 3.6425790786743164, | |
| "eval_runtime": 146.1194, | |
| "eval_samples_per_second": 164.626, | |
| "eval_steps_per_second": 5.146, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 32.49, | |
| "learning_rate": 0.0003082798635661476, | |
| "loss": 3.4258, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 32.7, | |
| "learning_rate": 0.0003071653589957993, | |
| "loss": 3.4076, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 32.9, | |
| "learning_rate": 0.000306046166665481, | |
| "loss": 3.4117, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 33.12, | |
| "learning_rate": 0.00030492233553229076, | |
| "loss": 3.5985, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 33.33, | |
| "learning_rate": 0.00030379391475624304, | |
| "loss": 3.3517, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 33.33, | |
| "eval_accuracy": 0.40682330082568596, | |
| "eval_loss": 3.5615479946136475, | |
| "eval_runtime": 146.0666, | |
| "eval_samples_per_second": 164.685, | |
| "eval_steps_per_second": 5.148, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 33.53, | |
| "learning_rate": 0.0003026609536981183, | |
| "loss": 3.3431, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 33.74, | |
| "learning_rate": 0.0003015235019173034, | |
| "loss": 3.3546, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 33.95, | |
| "learning_rate": 0.00030038160916962404, | |
| "loss": 3.3378, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 34.16, | |
| "learning_rate": 0.00029923532540516843, | |
| "loss": 3.5305, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 34.37, | |
| "learning_rate": 0.00029808470076610167, | |
| "loss": 3.2856, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 34.37, | |
| "eval_accuracy": 0.41555171151451314, | |
| "eval_loss": 3.4914703369140625, | |
| "eval_runtime": 146.1721, | |
| "eval_samples_per_second": 164.566, | |
| "eval_steps_per_second": 5.145, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 34.58, | |
| "learning_rate": 0.00029692978558447305, | |
| "loss": 3.273, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 34.78, | |
| "learning_rate": 0.0002957706303800139, | |
| "loss": 3.278, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 34.99, | |
| "learning_rate": 0.0002946072858579282, | |
| "loss": 3.2614, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 35.21, | |
| "learning_rate": 0.0002934398029066739, | |
| "loss": 3.4456, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 35.41, | |
| "learning_rate": 0.0002922682325957376, | |
| "loss": 3.227, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 35.41, | |
| "eval_accuracy": 0.4255488250901363, | |
| "eval_loss": 3.41792893409729, | |
| "eval_runtime": 146.0068, | |
| "eval_samples_per_second": 164.753, | |
| "eval_steps_per_second": 5.15, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 35.62, | |
| "learning_rate": 0.00029109262617339987, | |
| "loss": 3.1995, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 35.82, | |
| "learning_rate": 0.0002899130350644941, | |
| "loss": 3.2058, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 36.04, | |
| "learning_rate": 0.00028872951086815685, | |
| "loss": 3.4183, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 36.25, | |
| "learning_rate": 0.00028754210535557036, | |
| "loss": 3.1514, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 36.45, | |
| "learning_rate": 0.00028635087046769857, | |
| "loss": 3.1675, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 36.45, | |
| "eval_accuracy": 0.43245804160401624, | |
| "eval_loss": 3.3635590076446533, | |
| "eval_runtime": 146.1639, | |
| "eval_samples_per_second": 164.575, | |
| "eval_steps_per_second": 5.145, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 36.66, | |
| "learning_rate": 0.00028515585831301456, | |
| "loss": 3.1645, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 36.86, | |
| "learning_rate": 0.0002839571211652212, | |
| "loss": 3.1617, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 37.08, | |
| "learning_rate": 0.00028275471146096466, | |
| "loss": 3.3333, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 37.29, | |
| "learning_rate": 0.00028154868179754074, | |
| "loss": 3.1167, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 37.49, | |
| "learning_rate": 0.0002803390849305939, | |
| "loss": 3.0908, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 37.49, | |
| "eval_accuracy": 0.43940471782078516, | |
| "eval_loss": 3.30828595161438, | |
| "eval_runtime": 146.1043, | |
| "eval_samples_per_second": 164.643, | |
| "eval_steps_per_second": 5.147, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 37.7, | |
| "learning_rate": 0.0002791259737718097, | |
| "loss": 3.1214, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 37.9, | |
| "learning_rate": 0.0002779094013866001, | |
| "loss": 3.0987, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 38.12, | |
| "learning_rate": 0.00027668942099178234, | |
| "loss": 3.2767, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 38.33, | |
| "learning_rate": 0.00027546608595325117, | |
| "loss": 3.0716, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 38.53, | |
| "learning_rate": 0.00027423944978364416, | |
| "loss": 3.0561, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 38.53, | |
| "eval_accuracy": 0.44727625227121054, | |
| "eval_loss": 3.25723934173584, | |
| "eval_runtime": 145.9616, | |
| "eval_samples_per_second": 164.804, | |
| "eval_steps_per_second": 5.152, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 38.74, | |
| "learning_rate": 0.00027300956614000115, | |
| "loss": 3.0564, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 38.95, | |
| "learning_rate": 0.00027177648882141704, | |
| "loss": 3.0583, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 39.16, | |
| "learning_rate": 0.0002705402717666883, | |
| "loss": 3.2319, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 39.37, | |
| "learning_rate": 0.00026930096905195363, | |
| "loss": 3.0204, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 39.58, | |
| "learning_rate": 0.00026805863488832865, | |
| "loss": 3.0139, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 39.58, | |
| "eval_accuracy": 0.4525324485267982, | |
| "eval_loss": 3.215851306915283, | |
| "eval_runtime": 146.1327, | |
| "eval_samples_per_second": 164.611, | |
| "eval_steps_per_second": 5.146, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 39.78, | |
| "learning_rate": 0.00026681332361953424, | |
| "loss": 3.0053, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 39.99, | |
| "learning_rate": 0.0002655650897195195, | |
| "loss": 3.0171, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 40.21, | |
| "learning_rate": 0.0002643139877900791, | |
| "loss": 3.1749, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 40.41, | |
| "learning_rate": 0.00026306007255846436, | |
| "loss": 2.9764, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 40.62, | |
| "learning_rate": 0.00026180339887498953, | |
| "loss": 2.9837, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 40.62, | |
| "eval_accuracy": 0.45754832554207525, | |
| "eval_loss": 3.1789309978485107, | |
| "eval_runtime": 146.1778, | |
| "eval_samples_per_second": 164.56, | |
| "eval_steps_per_second": 5.144, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 40.82, | |
| "learning_rate": 0.00026054402171063267, | |
| "loss": 2.9752, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 41.04, | |
| "learning_rate": 0.0002592819961546308, | |
| "loss": 3.1648, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 41.25, | |
| "learning_rate": 0.00025801737741207005, | |
| "loss": 2.9438, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 41.45, | |
| "learning_rate": 0.000256750220801471, | |
| "loss": 2.941, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 41.66, | |
| "learning_rate": 0.0002554805817523689, | |
| "loss": 2.9387, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 41.66, | |
| "eval_accuracy": 0.46179467604077673, | |
| "eval_loss": 3.1430864334106445, | |
| "eval_runtime": 146.0529, | |
| "eval_samples_per_second": 164.701, | |
| "eval_steps_per_second": 5.149, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 41.86, | |
| "learning_rate": 0.0002542085158028889, | |
| "loss": 2.9371, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 42.08, | |
| "learning_rate": 0.00025293407859731633, | |
| "loss": 3.1085, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 42.29, | |
| "learning_rate": 0.00025165732588366334, | |
| "loss": 2.8999, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 42.49, | |
| "learning_rate": 0.00025037831351122967, | |
| "loss": 2.9159, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 42.7, | |
| "learning_rate": 0.0002490970974281599, | |
| "loss": 2.9034, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 42.7, | |
| "eval_accuracy": 0.46535935335872575, | |
| "eval_loss": 3.116283655166626, | |
| "eval_runtime": 146.1195, | |
| "eval_samples_per_second": 164.626, | |
| "eval_steps_per_second": 5.146, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 42.9, | |
| "learning_rate": 0.00024781373367899597, | |
| "loss": 2.8936, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 43.12, | |
| "learning_rate": 0.00024652827840222606, | |
| "loss": 3.0697, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 43.33, | |
| "learning_rate": 0.00024524078782782807, | |
| "loss": 2.8913, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 43.53, | |
| "learning_rate": 0.00024395131827481062, | |
| "loss": 2.8624, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 43.74, | |
| "learning_rate": 0.0002426599261487494, | |
| "loss": 2.8822, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 43.74, | |
| "eval_accuracy": 0.46941429535485324, | |
| "eval_loss": 3.0841524600982666, | |
| "eval_runtime": 146.1268, | |
| "eval_samples_per_second": 164.617, | |
| "eval_steps_per_second": 5.146, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 43.95, | |
| "learning_rate": 0.00024136666793931935, | |
| "loss": 2.8655, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 44.16, | |
| "learning_rate": 0.00024007160021782427, | |
| "loss": 3.0323, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 44.37, | |
| "learning_rate": 0.0002387747796347217, | |
| "loss": 2.8446, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 44.58, | |
| "learning_rate": 0.00023747626291714498, | |
| "loss": 2.8433, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 44.78, | |
| "learning_rate": 0.000236176106866422, | |
| "loss": 2.836, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 44.78, | |
| "eval_accuracy": 0.47268071006532664, | |
| "eval_loss": 3.0583226680755615, | |
| "eval_runtime": 145.9982, | |
| "eval_samples_per_second": 164.762, | |
| "eval_steps_per_second": 5.151, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 44.99, | |
| "learning_rate": 0.00023487436835559035, | |
| "loss": 2.8457, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 45.21, | |
| "learning_rate": 0.00023357110432690954, | |
| "loss": 2.9941, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 45.41, | |
| "learning_rate": 0.00023226637178937022, | |
| "loss": 2.8208, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 45.62, | |
| "learning_rate": 0.00023096022781620034, | |
| "loss": 2.8154, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 45.82, | |
| "learning_rate": 0.0002296527295423684, | |
| "loss": 2.8129, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 45.82, | |
| "eval_accuracy": 0.47600857452342976, | |
| "eval_loss": 3.035902738571167, | |
| "eval_runtime": 145.9849, | |
| "eval_samples_per_second": 164.777, | |
| "eval_steps_per_second": 5.151, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 46.04, | |
| "learning_rate": 0.00022834393416208486, | |
| "loss": 2.9871, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 46.25, | |
| "learning_rate": 0.0002270338989262994, | |
| "loss": 2.7892, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 46.45, | |
| "learning_rate": 0.00022572268114019726, | |
| "loss": 2.7843, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 46.66, | |
| "learning_rate": 0.00022441033816069202, | |
| "loss": 2.7867, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 46.86, | |
| "learning_rate": 0.00022309692739391727, | |
| "loss": 2.7733, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 46.86, | |
| "eval_accuracy": 0.47764141406488453, | |
| "eval_loss": 3.017348051071167, | |
| "eval_runtime": 146.0338, | |
| "eval_samples_per_second": 164.722, | |
| "eval_steps_per_second": 5.149, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 47.08, | |
| "learning_rate": 0.00022178250629271452, | |
| "loss": 2.981, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 47.29, | |
| "learning_rate": 0.00022046713235412103, | |
| "loss": 2.7598, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 47.49, | |
| "learning_rate": 0.00021915086311685404, | |
| "loss": 2.7769, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 47.7, | |
| "learning_rate": 0.00021783375615879415, | |
| "loss": 2.7753, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 47.9, | |
| "learning_rate": 0.0002165158690944665, | |
| "loss": 2.7589, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 47.9, | |
| "eval_accuracy": 0.4811929413931917, | |
| "eval_loss": 2.9977798461914062, | |
| "eval_runtime": 146.0602, | |
| "eval_samples_per_second": 164.692, | |
| "eval_steps_per_second": 5.149, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 48.12, | |
| "learning_rate": 0.00021519725957252063, | |
| "loss": 2.9409, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 48.33, | |
| "learning_rate": 0.00021387798527320882, | |
| "loss": 2.7465, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 48.53, | |
| "learning_rate": 0.0002125581039058627, | |
| "loss": 2.7403, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 48.74, | |
| "learning_rate": 0.0002112376732063691, | |
| "loss": 2.7284, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 48.95, | |
| "learning_rate": 0.00020991675093464448, | |
| "loss": 2.7378, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 48.95, | |
| "eval_accuracy": 0.4831324380858166, | |
| "eval_loss": 2.9787769317626953, | |
| "eval_runtime": 146.0148, | |
| "eval_samples_per_second": 164.744, | |
| "eval_steps_per_second": 5.15, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 49.16, | |
| "learning_rate": 0.00020859539487210813, | |
| "loss": 2.9167, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 49.37, | |
| "learning_rate": 0.0002072736628191549, | |
| "loss": 2.7203, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 49.58, | |
| "learning_rate": 0.0002059516125926265, | |
| "loss": 2.7276, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 49.78, | |
| "learning_rate": 0.00020462930202328278, | |
| "loss": 2.7001, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 49.99, | |
| "learning_rate": 0.00020330678895327174, | |
| "loss": 2.7138, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 49.99, | |
| "eval_accuracy": 0.4843915093446441, | |
| "eval_loss": 2.967425584793091, | |
| "eval_runtime": 146.0929, | |
| "eval_samples_per_second": 164.655, | |
| "eval_steps_per_second": 5.147, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 50.21, | |
| "learning_rate": 0.00020198413123359926, | |
| "loss": 2.8865, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 50.41, | |
| "learning_rate": 0.00020066138672159903, | |
| "loss": 2.698, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 50.62, | |
| "learning_rate": 0.00019933861327840098, | |
| "loss": 2.6978, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 50.82, | |
| "learning_rate": 0.00019801586876640073, | |
| "loss": 2.704, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 51.04, | |
| "learning_rate": 0.0001966932110467283, | |
| "loss": 2.8692, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 51.04, | |
| "eval_accuracy": 0.4874163939573572, | |
| "eval_loss": 2.9475862979888916, | |
| "eval_runtime": 145.9737, | |
| "eval_samples_per_second": 164.79, | |
| "eval_steps_per_second": 5.152, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 51.25, | |
| "learning_rate": 0.00019537069797671724, | |
| "loss": 2.6734, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 51.45, | |
| "learning_rate": 0.0001940483874073735, | |
| "loss": 2.6636, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 51.66, | |
| "learning_rate": 0.00019272633718084517, | |
| "loss": 2.6756, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 51.86, | |
| "learning_rate": 0.0001914046051278919, | |
| "loss": 2.6808, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 52.08, | |
| "learning_rate": 0.00019008324906535554, | |
| "loss": 2.8462, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 52.08, | |
| "eval_accuracy": 0.48931343115405407, | |
| "eval_loss": 2.934227466583252, | |
| "eval_runtime": 145.9977, | |
| "eval_samples_per_second": 164.763, | |
| "eval_steps_per_second": 5.151, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 52.29, | |
| "learning_rate": 0.0001887623267936309, | |
| "loss": 2.6553, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 52.49, | |
| "learning_rate": 0.00018744189609413734, | |
| "loss": 2.6559, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 52.7, | |
| "learning_rate": 0.0001861220147267912, | |
| "loss": 2.6536, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 52.9, | |
| "learning_rate": 0.0001848027404274794, | |
| "loss": 2.6524, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 53.12, | |
| "learning_rate": 0.00018348413090553354, | |
| "loss": 2.8312, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 53.12, | |
| "eval_accuracy": 0.4900369570164547, | |
| "eval_loss": 2.9268674850463867, | |
| "eval_runtime": 146.0027, | |
| "eval_samples_per_second": 164.757, | |
| "eval_steps_per_second": 5.151, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 53.33, | |
| "learning_rate": 0.00018216624384120595, | |
| "loss": 2.6306, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 53.53, | |
| "learning_rate": 0.00018084913688314597, | |
| "loss": 2.6398, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 53.74, | |
| "learning_rate": 0.000179532867645879, | |
| "loss": 2.6318, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 53.95, | |
| "learning_rate": 0.0001782174937072855, | |
| "loss": 2.6358, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 54.16, | |
| "learning_rate": 0.00017690307260608278, | |
| "loss": 2.7834, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 54.16, | |
| "eval_accuracy": 0.4917280711401593, | |
| "eval_loss": 2.911123037338257, | |
| "eval_runtime": 146.0206, | |
| "eval_samples_per_second": 164.737, | |
| "eval_steps_per_second": 5.15, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 54.37, | |
| "learning_rate": 0.000175589661839308, | |
| "loss": 2.6226, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 54.58, | |
| "learning_rate": 0.00017427731885980282, | |
| "loss": 2.6183, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 54.78, | |
| "learning_rate": 0.0001729661010737007, | |
| "loss": 2.6313, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 54.99, | |
| "learning_rate": 0.00017165606583791515, | |
| "loss": 2.6366, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 55.21, | |
| "learning_rate": 0.00017034727045763158, | |
| "loss": 2.7822, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 55.21, | |
| "eval_accuracy": 0.4934482911572486, | |
| "eval_loss": 2.8986542224884033, | |
| "eval_runtime": 146.1152, | |
| "eval_samples_per_second": 164.63, | |
| "eval_steps_per_second": 5.147, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 55.41, | |
| "learning_rate": 0.00016903977218379974, | |
| "loss": 2.5985, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 55.62, | |
| "learning_rate": 0.00016773362821062983, | |
| "loss": 2.6059, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 55.82, | |
| "learning_rate": 0.00016642889567309048, | |
| "loss": 2.6083, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 56.04, | |
| "learning_rate": 0.0001651256316444097, | |
| "loss": 2.7793, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 56.25, | |
| "learning_rate": 0.0001638238931335781, | |
| "loss": 2.584, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 56.25, | |
| "eval_accuracy": 0.49487679829418024, | |
| "eval_loss": 2.8844311237335205, | |
| "eval_runtime": 145.9294, | |
| "eval_samples_per_second": 164.84, | |
| "eval_steps_per_second": 5.153, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 56.45, | |
| "learning_rate": 0.00016252373708285504, | |
| "loss": 2.5884, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 56.66, | |
| "learning_rate": 0.00016122522036527838, | |
| "loss": 2.5881, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 56.86, | |
| "learning_rate": 0.00015992839978217578, | |
| "loss": 2.5866, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 57.08, | |
| "learning_rate": 0.00015863333206068067, | |
| "loss": 2.7644, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 57.29, | |
| "learning_rate": 0.00015734007385125067, | |
| "loss": 2.5668, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 57.29, | |
| "eval_accuracy": 0.49651714759851406, | |
| "eval_loss": 2.880821704864502, | |
| "eval_runtime": 146.1597, | |
| "eval_samples_per_second": 164.58, | |
| "eval_steps_per_second": 5.145, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 57.49, | |
| "learning_rate": 0.0001560486817251894, | |
| "loss": 2.5728, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 57.7, | |
| "learning_rate": 0.000154759212172172, | |
| "loss": 2.5765, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 57.9, | |
| "learning_rate": 0.00015347172159777396, | |
| "loss": 2.5794, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 58.12, | |
| "learning_rate": 0.000152186266321004, | |
| "loss": 2.7342, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 58.33, | |
| "learning_rate": 0.0001509029025718402, | |
| "loss": 2.5536, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 58.33, | |
| "eval_accuracy": 0.4981620698137741, | |
| "eval_loss": 2.864001512527466, | |
| "eval_runtime": 146.123, | |
| "eval_samples_per_second": 164.622, | |
| "eval_steps_per_second": 5.146, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 58.53, | |
| "learning_rate": 0.0001496216864887704, | |
| "loss": 2.5466, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 58.74, | |
| "learning_rate": 0.00014834267411633674, | |
| "loss": 2.553, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 58.95, | |
| "learning_rate": 0.0001470659214026837, | |
| "loss": 2.5623, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 59.16, | |
| "learning_rate": 0.00014579148419711119, | |
| "loss": 2.727, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 59.37, | |
| "learning_rate": 0.00014451941824763113, | |
| "loss": 2.5403, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 59.37, | |
| "eval_accuracy": 0.49815218958648255, | |
| "eval_loss": 2.860569953918457, | |
| "eval_runtime": 146.132, | |
| "eval_samples_per_second": 164.611, | |
| "eval_steps_per_second": 5.146, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 59.58, | |
| "learning_rate": 0.000143249779198529, | |
| "loss": 2.5441, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 59.78, | |
| "learning_rate": 0.00014198262258793002, | |
| "loss": 2.5541, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 59.99, | |
| "learning_rate": 0.00014071800384536927, | |
| "loss": 2.5482, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 60.21, | |
| "learning_rate": 0.00013945597828936737, | |
| "loss": 2.6878, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 60.41, | |
| "learning_rate": 0.00013819660112501054, | |
| "loss": 2.5294, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 60.41, | |
| "eval_accuracy": 0.5007705653773009, | |
| "eval_loss": 2.8440916538238525, | |
| "eval_runtime": 146.0675, | |
| "eval_samples_per_second": 164.684, | |
| "eval_steps_per_second": 5.148, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 60.62, | |
| "learning_rate": 0.00013693992744153572, | |
| "loss": 2.5448, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 60.82, | |
| "learning_rate": 0.00013568601220992097, | |
| "loss": 2.5435, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 61.04, | |
| "learning_rate": 0.00013443491028048045, | |
| "loss": 2.71, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 61.25, | |
| "learning_rate": 0.0001331866763804658, | |
| "loss": 2.5199, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 61.45, | |
| "learning_rate": 0.0001319413651116714, | |
| "loss": 2.513, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 61.45, | |
| "eval_accuracy": 0.5013016714921779, | |
| "eval_loss": 2.840217113494873, | |
| "eval_runtime": 146.0072, | |
| "eval_samples_per_second": 164.752, | |
| "eval_steps_per_second": 5.15, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 61.66, | |
| "learning_rate": 0.00013069903094804644, | |
| "loss": 2.5158, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 61.86, | |
| "learning_rate": 0.0001294597282333118, | |
| "loss": 2.5292, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 62.08, | |
| "learning_rate": 0.00012822351117858303, | |
| "loss": 2.6752, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 62.29, | |
| "learning_rate": 0.0001269904338599989, | |
| "loss": 2.5094, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 62.49, | |
| "learning_rate": 0.0001257605502163558, | |
| "loss": 2.5105, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 62.49, | |
| "eval_accuracy": 0.5022339398713631, | |
| "eval_loss": 2.8315513134002686, | |
| "eval_runtime": 146.1095, | |
| "eval_samples_per_second": 164.637, | |
| "eval_steps_per_second": 5.147, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 62.7, | |
| "learning_rate": 0.00012453391404674885, | |
| "loss": 2.4981, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 62.9, | |
| "learning_rate": 0.00012331057900821768, | |
| "loss": 2.5072, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 63.12, | |
| "learning_rate": 0.0001220905986134, | |
| "loss": 2.6561, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 63.33, | |
| "learning_rate": 0.00012087402622819039, | |
| "loss": 2.5062, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 63.53, | |
| "learning_rate": 0.00011966091506940616, | |
| "loss": 2.4897, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 63.53, | |
| "eval_accuracy": 0.502685487439774, | |
| "eval_loss": 2.823685646057129, | |
| "eval_runtime": 146.1084, | |
| "eval_samples_per_second": 164.638, | |
| "eval_steps_per_second": 5.147, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 63.74, | |
| "learning_rate": 0.00011845131820245934, | |
| "loss": 2.4945, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 63.95, | |
| "learning_rate": 0.00011724528853903536, | |
| "loss": 2.5023, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 64.16, | |
| "learning_rate": 0.00011604287883477889, | |
| "loss": 2.637, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 64.37, | |
| "learning_rate": 0.00011484414168698547, | |
| "loss": 2.4841, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 64.58, | |
| "learning_rate": 0.00011364912953230145, | |
| "loss": 2.4974, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 64.58, | |
| "eval_accuracy": 0.5039655187362361, | |
| "eval_loss": 2.818704605102539, | |
| "eval_runtime": 146.0534, | |
| "eval_samples_per_second": 164.7, | |
| "eval_steps_per_second": 5.149, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 64.78, | |
| "learning_rate": 0.00011245789464442964, | |
| "loss": 2.496, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 64.99, | |
| "learning_rate": 0.00011127048913184326, | |
| "loss": 2.4902, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 65.21, | |
| "learning_rate": 0.00011008696493550599, | |
| "loss": 2.6366, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 65.41, | |
| "learning_rate": 0.00010890737382660015, | |
| "loss": 2.4739, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 65.62, | |
| "learning_rate": 0.00010773176740426248, | |
| "loss": 2.4799, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 65.62, | |
| "eval_accuracy": 0.5044451239477096, | |
| "eval_loss": 2.8128514289855957, | |
| "eval_runtime": 146.0215, | |
| "eval_samples_per_second": 164.736, | |
| "eval_steps_per_second": 5.15, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 65.82, | |
| "learning_rate": 0.00010656019709332606, | |
| "loss": 2.4707, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 66.04, | |
| "learning_rate": 0.00010539271414207186, | |
| "loss": 2.6249, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 66.25, | |
| "learning_rate": 0.00010422936961998609, | |
| "loss": 2.4617, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 66.45, | |
| "learning_rate": 0.00010307021441552707, | |
| "loss": 2.4508, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 66.66, | |
| "learning_rate": 0.00010191529923389845, | |
| "loss": 2.4741, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 66.66, | |
| "eval_accuracy": 0.5057173793056381, | |
| "eval_loss": 2.805563449859619, | |
| "eval_runtime": 146.0069, | |
| "eval_samples_per_second": 164.752, | |
| "eval_steps_per_second": 5.15, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 66.86, | |
| "learning_rate": 0.00010076467459483155, | |
| "loss": 2.4658, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 67.08, | |
| "learning_rate": 9.961839083037592e-05, | |
| "loss": 2.6267, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 67.29, | |
| "learning_rate": 9.847649808269658e-05, | |
| "loss": 2.4656, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 67.49, | |
| "learning_rate": 9.733904630188176e-05, | |
| "loss": 2.4421, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 67.7, | |
| "learning_rate": 9.620608524375703e-05, | |
| "loss": 2.4582, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 67.7, | |
| "eval_accuracy": 0.506052237287108, | |
| "eval_loss": 2.80246639251709, | |
| "eval_runtime": 145.9985, | |
| "eval_samples_per_second": 164.762, | |
| "eval_steps_per_second": 5.151, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 67.9, | |
| "learning_rate": 9.507766446770934e-05, | |
| "loss": 2.456, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 68.12, | |
| "learning_rate": 9.39538333345191e-05, | |
| "loss": 2.6204, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 68.33, | |
| "learning_rate": 9.283464100420063e-05, | |
| "loss": 2.4513, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 68.53, | |
| "learning_rate": 9.17201364338524e-05, | |
| "loss": 2.4486, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 68.74, | |
| "learning_rate": 9.061036837551466e-05, | |
| "loss": 2.4389, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 68.74, | |
| "eval_accuracy": 0.5075605292045352, | |
| "eval_loss": 2.791304111480713, | |
| "eval_runtime": 146.0353, | |
| "eval_samples_per_second": 164.72, | |
| "eval_steps_per_second": 5.149, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 68.95, | |
| "learning_rate": 8.950538537403736e-05, | |
| "loss": 2.4384, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 69.16, | |
| "learning_rate": 8.840523576495681e-05, | |
| "loss": 2.5977, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 69.37, | |
| "learning_rate": 8.730996767238072e-05, | |
| "loss": 2.4459, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 69.58, | |
| "learning_rate": 8.621962900688378e-05, | |
| "loss": 2.4281, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 69.78, | |
| "learning_rate": 8.513426746341128e-05, | |
| "loss": 2.4539, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 69.78, | |
| "eval_accuracy": 0.5071934293322717, | |
| "eval_loss": 2.7881319522857666, | |
| "eval_runtime": 145.9867, | |
| "eval_samples_per_second": 164.775, | |
| "eval_steps_per_second": 5.151, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 69.99, | |
| "learning_rate": 8.405393051919333e-05, | |
| "loss": 2.4298, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 70.21, | |
| "learning_rate": 8.29786654316677e-05, | |
| "loss": 2.5885, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 70.41, | |
| "learning_rate": 8.190851923641259e-05, | |
| "loss": 2.4073, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 70.62, | |
| "learning_rate": 8.084353874508947e-05, | |
| "loss": 2.4379, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 70.82, | |
| "learning_rate": 7.978377054339499e-05, | |
| "loss": 2.4252, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 70.82, | |
| "eval_accuracy": 0.5081794918909719, | |
| "eval_loss": 2.7884321212768555, | |
| "eval_runtime": 146.1195, | |
| "eval_samples_per_second": 164.626, | |
| "eval_steps_per_second": 5.146, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 71.04, | |
| "learning_rate": 7.872926098902358e-05, | |
| "loss": 2.5932, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 71.25, | |
| "learning_rate": 7.768005620963916e-05, | |
| "loss": 2.4153, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 71.45, | |
| "learning_rate": 7.663620210085781e-05, | |
| "loss": 2.4195, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 71.66, | |
| "learning_rate": 7.55977443242399e-05, | |
| "loss": 2.4231, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 71.86, | |
| "learning_rate": 7.456472830529259e-05, | |
| "loss": 2.4287, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 71.86, | |
| "eval_accuracy": 0.5093288685486723, | |
| "eval_loss": 2.778383493423462, | |
| "eval_runtime": 145.9882, | |
| "eval_samples_per_second": 164.774, | |
| "eval_steps_per_second": 5.151, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 72.08, | |
| "learning_rate": 7.353719923148324e-05, | |
| "loss": 2.5804, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 72.29, | |
| "learning_rate": 7.251520205026205e-05, | |
| "loss": 2.4048, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 72.49, | |
| "learning_rate": 7.149878146709676e-05, | |
| "loss": 2.4008, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 72.7, | |
| "learning_rate": 7.048798194351625e-05, | |
| "loss": 2.41, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 72.9, | |
| "learning_rate": 6.948284769516627e-05, | |
| "loss": 2.4131, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 72.9, | |
| "eval_accuracy": 0.5098879891877023, | |
| "eval_loss": 2.7781522274017334, | |
| "eval_runtime": 146.0156, | |
| "eval_samples_per_second": 164.743, | |
| "eval_steps_per_second": 5.15, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 73.12, | |
| "learning_rate": 6.848342268987511e-05, | |
| "loss": 2.5661, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 73.33, | |
| "learning_rate": 6.748975064573007e-05, | |
| "loss": 2.3994, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 73.53, | |
| "learning_rate": 6.650187502916552e-05, | |
| "loss": 2.4078, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 73.74, | |
| "learning_rate": 6.551983905306107e-05, | |
| "loss": 2.4168, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 73.95, | |
| "learning_rate": 6.454368567485183e-05, | |
| "loss": 2.4016, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 73.95, | |
| "eval_accuracy": 0.5097699735946659, | |
| "eval_loss": 2.772381544113159, | |
| "eval_runtime": 146.0586, | |
| "eval_samples_per_second": 164.694, | |
| "eval_steps_per_second": 5.149, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 74.16, | |
| "learning_rate": 6.35734575946487e-05, | |
| "loss": 2.5732, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 74.37, | |
| "learning_rate": 6.260919725337109e-05, | |
| "loss": 2.3961, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 74.58, | |
| "learning_rate": 6.165094683089015e-05, | |
| "loss": 2.4073, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 74.78, | |
| "learning_rate": 6.069874824418356e-05, | |
| "loss": 2.3997, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 74.99, | |
| "learning_rate": 5.975264314550229e-05, | |
| "loss": 2.3998, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 74.99, | |
| "eval_accuracy": 0.5110515365958426, | |
| "eval_loss": 2.7658748626708984, | |
| "eval_runtime": 146.0874, | |
| "eval_samples_per_second": 164.662, | |
| "eval_steps_per_second": 5.148, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 75.21, | |
| "learning_rate": 5.881267292054828e-05, | |
| "loss": 2.5492, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 75.41, | |
| "learning_rate": 5.787887868666417e-05, | |
| "loss": 2.3838, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 75.62, | |
| "learning_rate": 5.6951301291034945e-05, | |
| "loss": 2.398, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 75.82, | |
| "learning_rate": 5.602998130890065e-05, | |
| "loss": 2.4025, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 76.04, | |
| "learning_rate": 5.511495904178221e-05, | |
| "loss": 2.5475, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 76.04, | |
| "eval_accuracy": 0.510823536539714, | |
| "eval_loss": 2.7650203704833984, | |
| "eval_runtime": 146.0073, | |
| "eval_samples_per_second": 164.752, | |
| "eval_steps_per_second": 5.15, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 76.25, | |
| "learning_rate": 5.4206274515717736e-05, | |
| "loss": 2.4011, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 76.45, | |
| "learning_rate": 5.330396747951205e-05, | |
| "loss": 2.3818, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 76.66, | |
| "learning_rate": 5.240807740299811e-05, | |
| "loss": 2.3911, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 76.86, | |
| "learning_rate": 5.1518643475310034e-05, | |
| "loss": 2.389, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 77.08, | |
| "learning_rate": 5.0635704603169287e-05, | |
| "loss": 2.5443, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 77.08, | |
| "eval_accuracy": 0.5117344133064243, | |
| "eval_loss": 2.7620205879211426, | |
| "eval_runtime": 146.0022, | |
| "eval_samples_per_second": 164.758, | |
| "eval_steps_per_second": 5.151, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 77.29, | |
| "learning_rate": 4.975929940918236e-05, | |
| "loss": 2.38, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 77.49, | |
| "learning_rate": 4.8889466230151646e-05, | |
| "loss": 2.3758, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 77.7, | |
| "learning_rate": 4.8026243115398314e-05, | |
| "loss": 2.3744, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 77.9, | |
| "learning_rate": 4.7169667825097775e-05, | |
| "loss": 2.3784, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 78.12, | |
| "learning_rate": 4.631977782862824e-05, | |
| "loss": 2.5381, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 78.12, | |
| "eval_accuracy": 0.5115312635222847, | |
| "eval_loss": 2.76308274269104, | |
| "eval_runtime": 146.1953, | |
| "eval_samples_per_second": 164.54, | |
| "eval_steps_per_second": 5.144, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 78.33, | |
| "learning_rate": 4.547661030293129e-05, | |
| "loss": 2.3771, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 78.53, | |
| "learning_rate": 4.464020213088611e-05, | |
| "loss": 2.3786, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 78.74, | |
| "learning_rate": 4.381058989969564e-05, | |
| "loss": 2.3688, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 78.95, | |
| "learning_rate": 4.298780989928646e-05, | |
| "loss": 2.3792, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 79.16, | |
| "learning_rate": 4.217189812072131e-05, | |
| "loss": 2.5269, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 79.16, | |
| "eval_accuracy": 0.5122286175796967, | |
| "eval_loss": 2.7577943801879883, | |
| "eval_runtime": 146.122, | |
| "eval_samples_per_second": 164.623, | |
| "eval_steps_per_second": 5.146, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 79.37, | |
| "learning_rate": 4.136289025462443e-05, | |
| "loss": 2.3679, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 79.58, | |
| "learning_rate": 4.0560821689620856e-05, | |
| "loss": 2.3749, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 79.78, | |
| "learning_rate": 3.976572751078782e-05, | |
| "loss": 2.3605, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 79.99, | |
| "learning_rate": 3.8977642498120594e-05, | |
| "loss": 2.3747, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 80.21, | |
| "learning_rate": 3.819660112501053e-05, | |
| "loss": 2.5288, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 80.21, | |
| "eval_accuracy": 0.5124386898610601, | |
| "eval_loss": 2.754046678543091, | |
| "eval_runtime": 146.1606, | |
| "eval_samples_per_second": 164.579, | |
| "eval_steps_per_second": 5.145, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 80.41, | |
| "learning_rate": 3.742263755673758e-05, | |
| "loss": 2.367, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 80.62, | |
| "learning_rate": 3.6655785648975585e-05, | |
| "loss": 2.3667, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 80.82, | |
| "learning_rate": 3.589607894631111e-05, | |
| "loss": 2.3717, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 81.04, | |
| "learning_rate": 3.514355068077655e-05, | |
| "loss": 2.5195, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 81.25, | |
| "learning_rate": 3.439823377039599e-05, | |
| "loss": 2.3669, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 81.25, | |
| "eval_accuracy": 0.5124800918682825, | |
| "eval_loss": 2.752890110015869, | |
| "eval_runtime": 145.9521, | |
| "eval_samples_per_second": 164.814, | |
| "eval_steps_per_second": 5.152, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 81.45, | |
| "learning_rate": 3.36601608177457e-05, | |
| "loss": 2.3595, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 81.66, | |
| "learning_rate": 3.292936410852754e-05, | |
| "loss": 2.3727, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 81.86, | |
| "learning_rate": 3.220587561015709e-05, | |
| "loss": 2.3707, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 82.08, | |
| "learning_rate": 3.148972697036507e-05, | |
| "loss": 2.508, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 82.29, | |
| "learning_rate": 3.078094951581289e-05, | |
| "loss": 2.3631, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 82.29, | |
| "eval_accuracy": 0.5132219293707184, | |
| "eval_loss": 2.749772071838379, | |
| "eval_runtime": 146.0679, | |
| "eval_samples_per_second": 164.684, | |
| "eval_steps_per_second": 5.148, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 82.49, | |
| "learning_rate": 3.007957425072265e-05, | |
| "loss": 2.3568, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 82.7, | |
| "learning_rate": 2.9385631855520546e-05, | |
| "loss": 2.3679, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 82.9, | |
| "learning_rate": 2.8699152685494925e-05, | |
| "loss": 2.3504, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 83.12, | |
| "learning_rate": 2.8020166769468616e-05, | |
| "loss": 2.5054, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 83.33, | |
| "learning_rate": 2.7348703808485223e-05, | |
| "loss": 2.3499, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 83.33, | |
| "eval_accuracy": 0.5135577468816207, | |
| "eval_loss": 2.7453861236572266, | |
| "eval_runtime": 146.0782, | |
| "eval_samples_per_second": 164.672, | |
| "eval_steps_per_second": 5.148, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 83.53, | |
| "learning_rate": 2.6684793174509915e-05, | |
| "loss": 2.3478, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 83.74, | |
| "learning_rate": 2.6028463909144574e-05, | |
| "loss": 2.3686, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 83.95, | |
| "learning_rate": 2.5379744722357403e-05, | |
| "loss": 2.3636, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 84.16, | |
| "learning_rate": 2.473866399122733e-05, | |
| "loss": 2.5195, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 84.37, | |
| "learning_rate": 2.410524975870221e-05, | |
| "loss": 2.3726, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 84.37, | |
| "eval_accuracy": 0.5140964497348997, | |
| "eval_loss": 2.7446117401123047, | |
| "eval_runtime": 146.09, | |
| "eval_samples_per_second": 164.659, | |
| "eval_steps_per_second": 5.148, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 84.58, | |
| "learning_rate": 2.347952973237262e-05, | |
| "loss": 2.3504, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 84.78, | |
| "learning_rate": 2.286153128325954e-05, | |
| "loss": 2.351, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 84.99, | |
| "learning_rate": 2.2251281444617257e-05, | |
| "loss": 2.3506, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 85.21, | |
| "learning_rate": 2.1648806910750575e-05, | |
| "loss": 2.5104, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 85.41, | |
| "learning_rate": 2.1054134035847307e-05, | |
| "loss": 2.3411, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 85.41, | |
| "eval_accuracy": 0.5143741932133077, | |
| "eval_loss": 2.740255355834961, | |
| "eval_runtime": 146.0438, | |
| "eval_samples_per_second": 164.711, | |
| "eval_steps_per_second": 5.149, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 85.62, | |
| "learning_rate": 2.0467288832825583e-05, | |
| "loss": 2.3666, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 85.82, | |
| "learning_rate": 1.9888296972195587e-05, | |
| "loss": 2.3451, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 86.04, | |
| "learning_rate": 1.931718378093703e-05, | |
| "loss": 2.5151, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 86.25, | |
| "learning_rate": 1.875397424139109e-05, | |
| "loss": 2.3539, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 86.45, | |
| "learning_rate": 1.81986929901675e-05, | |
| "loss": 2.3321, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 86.45, | |
| "eval_accuracy": 0.5146461086764289, | |
| "eval_loss": 2.7371606826782227, | |
| "eval_runtime": 146.1164, | |
| "eval_samples_per_second": 164.629, | |
| "eval_steps_per_second": 5.147, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 86.66, | |
| "learning_rate": 1.765136431706711e-05, | |
| "loss": 2.3573, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 86.86, | |
| "learning_rate": 1.711201216401912e-05, | |
| "loss": 2.3422, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 87.08, | |
| "learning_rate": 1.6580660124034032e-05, | |
| "loss": 2.5055, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 87.29, | |
| "learning_rate": 1.605733144017132e-05, | |
| "loss": 2.3429, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 87.49, | |
| "learning_rate": 1.5542049004523053e-05, | |
| "loss": 2.3456, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 87.49, | |
| "eval_accuracy": 0.5146212850149416, | |
| "eval_loss": 2.7389299869537354, | |
| "eval_runtime": 146.0012, | |
| "eval_samples_per_second": 164.759, | |
| "eval_steps_per_second": 5.151, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 87.7, | |
| "learning_rate": 1.503483535721224e-05, | |
| "loss": 2.3608, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 87.9, | |
| "learning_rate": 1.4535712685406921e-05, | |
| "loss": 2.3466, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 88.12, | |
| "learning_rate": 1.4044702822349731e-05, | |
| "loss": 2.4892, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 88.33, | |
| "learning_rate": 1.3561827246402692e-05, | |
| "loss": 2.3418, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 88.53, | |
| "learning_rate": 1.3087107080107853e-05, | |
| "loss": 2.3372, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 88.53, | |
| "eval_accuracy": 0.515111201963272, | |
| "eval_loss": 2.7384002208709717, | |
| "eval_runtime": 145.8226, | |
| "eval_samples_per_second": 164.961, | |
| "eval_steps_per_second": 5.157, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 88.74, | |
| "learning_rate": 1.2620563089263093e-05, | |
| "loss": 2.3411, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 88.95, | |
| "learning_rate": 1.2162215682014012e-05, | |
| "loss": 2.3637, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 89.16, | |
| "learning_rate": 1.1712084907961053e-05, | |
| "loss": 2.4971, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 89.37, | |
| "learning_rate": 1.127019045728246e-05, | |
| "loss": 2.3476, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 89.58, | |
| "learning_rate": 1.0836551659873074e-05, | |
| "loss": 2.343, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 89.58, | |
| "eval_accuracy": 0.5144067649722459, | |
| "eval_loss": 2.7397918701171875, | |
| "eval_runtime": 146.0005, | |
| "eval_samples_per_second": 164.76, | |
| "eval_steps_per_second": 5.151, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 89.58, | |
| "step": 4300, | |
| "total_flos": 2.954083328682332e+17, | |
| "train_loss": 3.7431876293448516, | |
| "train_runtime": 42244.2763, | |
| "train_samples_per_second": 58.967, | |
| "train_steps_per_second": 0.114 | |
| } | |
| ], | |
| "max_steps": 4800, | |
| "num_train_epochs": 100, | |
| "total_flos": 2.954083328682332e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |