| { | |
| "best_metric": 0.3075002644877919, | |
| "best_model_checkpoint": "ru_t5_logs/checkpoint-7200", | |
| "epoch": 1.21786719081204, | |
| "global_step": 7900, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5.555555555555555e-05, | |
| "loss": 8.8426, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.0001111111111111111, | |
| "loss": 2.5171, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00016666666666666666, | |
| "loss": 1.4573, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002222222222222222, | |
| "loss": 1.1602, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002777777777777778, | |
| "loss": 0.9913, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0003333333333333333, | |
| "loss": 1.0849, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0003888888888888889, | |
| "loss": 1.1257, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0004444444444444444, | |
| "loss": 1.1418, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0005, | |
| "loss": 1.0913, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0004999996711179958, | |
| "loss": 1.147, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_bleu": 0.057150905355919934, | |
| "eval_loss": 0.9198915958404541, | |
| "eval_meteor": 0.11030404785434701, | |
| "eval_rouge1": 0.279545903055327, | |
| "eval_rouge2": 0.1375545207790398, | |
| "eval_rougeL": 0.22993057523222138, | |
| "eval_rougeLsum": 0.23002986644843249, | |
| "eval_runtime": 1292.7346, | |
| "eval_samples_per_second": 1.128, | |
| "eval_steps_per_second": 0.188, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0004999986844728487, | |
| "loss": 1.067, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0004999970400671544, | |
| "loss": 1.1341, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0004999947379052395, | |
| "loss": 0.9852, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0004999917779931613, | |
| "loss": 1.0095, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0004999881603387073, | |
| "loss": 1.0118, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0004999838849513957, | |
| "loss": 1.0513, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0004999789518424755, | |
| "loss": 1.009, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0004999733610249258, | |
| "loss": 0.9138, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0004999671125134564, | |
| "loss": 0.9989, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0004999602063245074, | |
| "loss": 0.9443, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_bleu": 0.08174242542801959, | |
| "eval_loss": 0.8470357656478882, | |
| "eval_meteor": 0.13334461127658528, | |
| "eval_rouge1": 0.3118630390437005, | |
| "eval_rouge2": 0.17036843477123215, | |
| "eval_rougeL": 0.26155329349275835, | |
| "eval_rougeLsum": 0.26141341054848355, | |
| "eval_runtime": 1098.495, | |
| "eval_samples_per_second": 1.327, | |
| "eval_steps_per_second": 0.221, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0004999526424762496, | |
| "loss": 0.9839, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0004999444209885838, | |
| "loss": 0.9479, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0004999355418831412, | |
| "loss": 0.917, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0004999260051832831, | |
| "loss": 0.87, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0004999158109141012, | |
| "loss": 0.9722, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0004999049591024172, | |
| "loss": 0.9619, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0004998934497767828, | |
| "loss": 1.0447, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0004998812829674797, | |
| "loss": 0.8547, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0004998684587065195, | |
| "loss": 0.8972, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0004998549770276435, | |
| "loss": 0.8871, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_bleu": 0.046252384714512554, | |
| "eval_loss": 0.8526527881622314, | |
| "eval_meteor": 0.09538529454387626, | |
| "eval_rouge1": 0.27543511716574076, | |
| "eval_rouge2": 0.1605961208091074, | |
| "eval_rougeL": 0.24354530581569278, | |
| "eval_rougeLsum": 0.24344913233941234, | |
| "eval_runtime": 882.6979, | |
| "eval_samples_per_second": 1.652, | |
| "eval_steps_per_second": 0.275, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0004998408379663226, | |
| "loss": 0.8772, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0004998260415597578, | |
| "loss": 1.0016, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.000499810587846879, | |
| "loss": 0.9394, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0004997944768683458, | |
| "loss": 0.8853, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0004997777086665473, | |
| "loss": 0.9091, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0004997602832856013, | |
| "loss": 0.8932, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0004997422007713551, | |
| "loss": 0.8659, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0004997234611713849, | |
| "loss": 0.9325, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0004997040645349955, | |
| "loss": 0.9674, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0004996840109132205, | |
| "loss": 0.9608, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_bleu": 0.08586731335447921, | |
| "eval_loss": 0.8303579092025757, | |
| "eval_meteor": 0.1430676871634322, | |
| "eval_rouge1": 0.31709190550852295, | |
| "eval_rouge2": 0.17041273297322634, | |
| "eval_rougeL": 0.267869089287303, | |
| "eval_rougeLsum": 0.2677306207885424, | |
| "eval_runtime": 1072.7177, | |
| "eval_samples_per_second": 1.359, | |
| "eval_steps_per_second": 0.227, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0004996633003588222, | |
| "loss": 0.9616, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0004996419329262913, | |
| "loss": 0.9675, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0004996199086718466, | |
| "loss": 0.9418, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0004995972276534351, | |
| "loss": 0.9631, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0004995738899307319, | |
| "loss": 0.9218, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0004995498955651399, | |
| "loss": 0.984, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0004995252446197894, | |
| "loss": 0.9286, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0004994999371595388, | |
| "loss": 0.8722, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0004994739732509731, | |
| "loss": 0.8834, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.000499447352962405, | |
| "loss": 0.953, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_bleu": 0.09861710848144628, | |
| "eval_loss": 0.8263402581214905, | |
| "eval_meteor": 0.15312912847569377, | |
| "eval_rouge1": 0.3263458627968157, | |
| "eval_rouge2": 0.17504244009075126, | |
| "eval_rougeL": 0.2714073081979089, | |
| "eval_rougeLsum": 0.27118688987783984, | |
| "eval_runtime": 1146.0503, | |
| "eval_samples_per_second": 1.272, | |
| "eval_steps_per_second": 0.212, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0004994200763638739, | |
| "loss": 0.8363, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.000499392143527146, | |
| "loss": 0.9203, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0004993635545257144, | |
| "loss": 0.9375, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0004993343094347979, | |
| "loss": 0.9548, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0004993044083313425, | |
| "loss": 0.9221, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0004992738512940194, | |
| "loss": 0.8666, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0004992426384032258, | |
| "loss": 0.8917, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0004992107697410848, | |
| "loss": 0.9237, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0004991782453914444, | |
| "loss": 0.8657, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0004991450654398782, | |
| "loss": 0.9489, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_bleu": 0.10113643092041803, | |
| "eval_loss": 0.8359497785568237, | |
| "eval_meteor": 0.1654018684728887, | |
| "eval_rouge1": 0.33698613363555824, | |
| "eval_rouge2": 0.17694734353424882, | |
| "eval_rougeL": 0.2749185603408262, | |
| "eval_rougeLsum": 0.2750113209557301, | |
| "eval_runtime": 1213.3811, | |
| "eval_samples_per_second": 1.202, | |
| "eval_steps_per_second": 0.2, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0004991112299736844, | |
| "loss": 0.9289, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.000499076739081886, | |
| "loss": 0.9123, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0004990415928552305, | |
| "loss": 0.9329, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0004990057913861896, | |
| "loss": 0.8874, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0004989693347689589, | |
| "loss": 0.9698, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.000498932223099458, | |
| "loss": 0.831, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0004988944564753295, | |
| "loss": 0.8701, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0004988560349959396, | |
| "loss": 0.8953, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0004988169587623776, | |
| "loss": 0.9621, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0004987772278774548, | |
| "loss": 0.8948, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_bleu": 0.04255087666728896, | |
| "eval_loss": 0.8361812233924866, | |
| "eval_meteor": 0.099997885358938, | |
| "eval_rouge1": 0.28420928207646434, | |
| "eval_rouge2": 0.1653214913198871, | |
| "eval_rougeL": 0.25267504887077563, | |
| "eval_rougeLsum": 0.25262418214939675, | |
| "eval_runtime": 754.6649, | |
| "eval_samples_per_second": 1.932, | |
| "eval_steps_per_second": 0.322, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0004987368424457058, | |
| "loss": 0.9126, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0004986958025733868, | |
| "loss": 0.9425, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.000498654108368476, | |
| "loss": 0.8944, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0004986117599406733, | |
| "loss": 0.8977, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0004985687574013994, | |
| "loss": 0.8851, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0004985251008637968, | |
| "loss": 0.8542, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0004984807904427281, | |
| "loss": 0.8809, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0004984358262547766, | |
| "loss": 0.8334, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0004983902084182455, | |
| "loss": 0.8488, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.000498343937053158, | |
| "loss": 0.8409, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_bleu": 0.09329201888155486, | |
| "eval_loss": 0.8163634538650513, | |
| "eval_meteor": 0.15770696776351786, | |
| "eval_rouge1": 0.3315883123187934, | |
| "eval_rouge2": 0.1767418040045381, | |
| "eval_rougeL": 0.27737567013849995, | |
| "eval_rougeLsum": 0.27738229938080694, | |
| "eval_runtime": 1063.2636, | |
| "eval_samples_per_second": 1.371, | |
| "eval_steps_per_second": 0.229, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0004982970122812566, | |
| "loss": 0.7996, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0004982494342260029, | |
| "loss": 0.8564, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0004982012030125775, | |
| "loss": 0.9214, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0004981523187678796, | |
| "loss": 0.8301, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0004981027816205262, | |
| "loss": 0.8948, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0004980525917008523, | |
| "loss": 0.8467, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0004980017491409103, | |
| "loss": 0.9733, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0004979502540744702, | |
| "loss": 0.9432, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.000497898106637018, | |
| "loss": 0.9256, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0004978453069657568, | |
| "loss": 0.8505, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_bleu": 0.08288033858994784, | |
| "eval_loss": 0.8515655398368835, | |
| "eval_meteor": 0.13825343548644084, | |
| "eval_rouge1": 0.31117223601763094, | |
| "eval_rouge2": 0.17049205940823398, | |
| "eval_rougeL": 0.26282109139798354, | |
| "eval_rougeLsum": 0.26267587400999515, | |
| "eval_runtime": 1043.0682, | |
| "eval_samples_per_second": 1.398, | |
| "eval_steps_per_second": 0.233, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0004977918551996054, | |
| "loss": 0.8471, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0004977377514791983, | |
| "loss": 0.9285, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0004976829959468855, | |
| "loss": 0.8993, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0004976275887467319, | |
| "loss": 0.9278, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0004975715300245169, | |
| "loss": 0.8716, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0004975148199277342, | |
| "loss": 0.8836, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0004974574586055912, | |
| "loss": 0.9705, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0004973994462090088, | |
| "loss": 0.8766, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0004973407828906207, | |
| "loss": 0.8644, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0004972814688047736, | |
| "loss": 0.8516, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_bleu": 0.11086631103897832, | |
| "eval_loss": 0.8234091401100159, | |
| "eval_meteor": 0.1702736355204243, | |
| "eval_rouge1": 0.33339211687613324, | |
| "eval_rouge2": 0.17689852127575206, | |
| "eval_rougeL": 0.2749015362723488, | |
| "eval_rougeLsum": 0.2749567892668668, | |
| "eval_runtime": 1362.5888, | |
| "eval_samples_per_second": 1.07, | |
| "eval_steps_per_second": 0.178, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0004972215041075261, | |
| "loss": 0.9137, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0004971608889566486, | |
| "loss": 0.9558, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0004970996235116231, | |
| "loss": 0.9403, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0004970377079336422, | |
| "loss": 0.9096, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0004969751423856095, | |
| "loss": 0.9792, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0004969119270321383, | |
| "loss": 0.8368, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0004968480620395519, | |
| "loss": 0.9117, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0004967835475758825, | |
| "loss": 0.91, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0004967183838108713, | |
| "loss": 0.897, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0004966525709159679, | |
| "loss": 0.876, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_bleu": 0.08012098475489943, | |
| "eval_loss": 0.810808539390564, | |
| "eval_meteor": 0.13737464649721876, | |
| "eval_rouge1": 0.31919458917681476, | |
| "eval_rouge2": 0.17969717787411127, | |
| "eval_rougeL": 0.2725775781741208, | |
| "eval_rougeLsum": 0.27236890457104335, | |
| "eval_runtime": 942.8581, | |
| "eval_samples_per_second": 1.546, | |
| "eval_steps_per_second": 0.258, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0004965861090643296, | |
| "loss": 0.9041, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0004965189984308215, | |
| "loss": 0.9139, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0004964512391920151, | |
| "loss": 0.8988, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.000496382831526189, | |
| "loss": 0.8624, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0004963137756133274, | |
| "loss": 0.8518, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0004962440716351205, | |
| "loss": 0.9235, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0004961737197749633, | |
| "loss": 0.8071, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0004961027202179554, | |
| "loss": 0.894, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0004960310731509007, | |
| "loss": 0.9255, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0004959587787623065, | |
| "loss": 0.8109, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_bleu": 0.09517651862521773, | |
| "eval_loss": 0.8174175024032593, | |
| "eval_meteor": 0.16180826590180308, | |
| "eval_rouge1": 0.33582745913845957, | |
| "eval_rouge2": 0.1811654909375417, | |
| "eval_rougeL": 0.281565966212259, | |
| "eval_rougeLsum": 0.2813838288792657, | |
| "eval_runtime": 1002.5623, | |
| "eval_samples_per_second": 1.454, | |
| "eval_steps_per_second": 0.242, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0004958858372423837, | |
| "loss": 0.8624, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.000495812248783045, | |
| "loss": 0.8696, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0004957380135779064, | |
| "loss": 0.859, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0004956631318222846, | |
| "loss": 0.8144, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0004955876037131974, | |
| "loss": 0.87, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0004955114294493639, | |
| "loss": 0.8819, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0004954346092312026, | |
| "loss": 0.8935, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0004953571432608321, | |
| "loss": 0.8716, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0004952790317420694, | |
| "loss": 0.8911, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0004952002748804304, | |
| "loss": 0.8183, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_bleu": 0.10596849766835054, | |
| "eval_loss": 0.8043612837791443, | |
| "eval_meteor": 0.1691677125903321, | |
| "eval_rouge1": 0.3389925011791456, | |
| "eval_rouge2": 0.18348093472072707, | |
| "eval_rougeL": 0.28213366266002454, | |
| "eval_rougeLsum": 0.28213090498762367, | |
| "eval_runtime": 1141.1899, | |
| "eval_samples_per_second": 1.278, | |
| "eval_steps_per_second": 0.213, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0004951208728831289, | |
| "loss": 0.8433, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0004950408259590757, | |
| "loss": 0.8268, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0004949601343188792, | |
| "loss": 0.911, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0004948787981748433, | |
| "loss": 0.8638, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0004947968177409681, | |
| "loss": 0.8691, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0004947141932329486, | |
| "loss": 0.8499, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0004946309248681745, | |
| "loss": 0.8832, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0004945470128657297, | |
| "loss": 0.8744, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0004944624574463913, | |
| "loss": 0.8219, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0004943772588326292, | |
| "loss": 0.8407, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_bleu": 0.11433808067218891, | |
| "eval_loss": 0.8030869960784912, | |
| "eval_meteor": 0.17630048064575726, | |
| "eval_rouge1": 0.34423855201642795, | |
| "eval_rouge2": 0.18361911351356364, | |
| "eval_rougeL": 0.2837522876067602, | |
| "eval_rougeLsum": 0.28364725151298015, | |
| "eval_runtime": 1232.5857, | |
| "eval_samples_per_second": 1.183, | |
| "eval_steps_per_second": 0.197, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0004942914172486059, | |
| "loss": 0.8932, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0004942049329201754, | |
| "loss": 0.8544, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0004941178060748829, | |
| "loss": 0.7995, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0004940300369419637, | |
| "loss": 0.8576, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0004939416257523436, | |
| "loss": 0.8654, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0004938525727386373, | |
| "loss": 0.8559, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0004937628781351483, | |
| "loss": 0.8738, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.000493672542177868, | |
| "loss": 0.9433, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0004935815651044755, | |
| "loss": 0.8125, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0004934899471543366, | |
| "loss": 0.886, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_bleu": 0.12742687031814315, | |
| "eval_loss": 0.805793821811676, | |
| "eval_meteor": 0.19296014590447547, | |
| "eval_rouge1": 0.35221171953589286, | |
| "eval_rouge2": 0.1846768871884069, | |
| "eval_rougeL": 0.2848280618153002, | |
| "eval_rougeLsum": 0.28493716966830007, | |
| "eval_runtime": 1452.5375, | |
| "eval_samples_per_second": 1.004, | |
| "eval_steps_per_second": 0.167, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0004933976885685031, | |
| "loss": 0.8315, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0004933047895897127, | |
| "loss": 0.8242, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0004932112504623876, | |
| "loss": 0.8638, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0004931170714326347, | |
| "loss": 0.831, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0004930222527482442, | |
| "loss": 0.8754, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0004929267946586894, | |
| "loss": 0.817, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0004928306974151258, | |
| "loss": 0.906, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0004927339612703908, | |
| "loss": 0.8899, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0004926365864790025, | |
| "loss": 0.8845, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0004925385732971595, | |
| "loss": 0.8761, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_bleu": 0.10770304213160696, | |
| "eval_loss": 0.8112803101539612, | |
| "eval_meteor": 0.17046581268631894, | |
| "eval_rouge1": 0.3402358184654584, | |
| "eval_rouge2": 0.1827631633093852, | |
| "eval_rougeL": 0.28122066588095507, | |
| "eval_rougeLsum": 0.28114478510696167, | |
| "eval_runtime": 1067.7239, | |
| "eval_samples_per_second": 1.366, | |
| "eval_steps_per_second": 0.228, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0004924399219827398, | |
| "loss": 0.9299, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0004923406327953007, | |
| "loss": 0.8943, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0004922407059960776, | |
| "loss": 0.9165, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0004921401418479834, | |
| "loss": 0.8499, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0004920389406156082, | |
| "loss": 0.8766, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.000491937102565218, | |
| "loss": 0.8453, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0004918346279647544, | |
| "loss": 0.8943, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0004917315170838339, | |
| "loss": 0.8835, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0004916277701937468, | |
| "loss": 0.8386, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0004915233875674572, | |
| "loss": 0.872, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_bleu": 0.11788189676776079, | |
| "eval_loss": 0.802962601184845, | |
| "eval_meteor": 0.18209545205592362, | |
| "eval_rouge1": 0.3438678638236826, | |
| "eval_rouge2": 0.18232469842121968, | |
| "eval_rougeL": 0.28305359100970484, | |
| "eval_rougeLsum": 0.2829156060905057, | |
| "eval_runtime": 1203.8235, | |
| "eval_samples_per_second": 1.211, | |
| "eval_steps_per_second": 0.202, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0004914183694796016, | |
| "loss": 0.8319, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0004913127162064885, | |
| "loss": 0.8564, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0004912064280260974, | |
| "loss": 0.8453, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0004910995052180786, | |
| "loss": 0.8255, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0004909919480637519, | |
| "loss": 0.8831, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0004908837568461064, | |
| "loss": 0.8273, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0004907749318497991, | |
| "loss": 0.8494, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0004906654733611547, | |
| "loss": 0.8869, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0004905553816681646, | |
| "loss": 0.778, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0004904446570604862, | |
| "loss": 0.9082, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_bleu": 0.11523676534243875, | |
| "eval_loss": 0.8040370345115662, | |
| "eval_meteor": 0.17806856469850704, | |
| "eval_rouge1": 0.34321948648974265, | |
| "eval_rouge2": 0.18492135732773402, | |
| "eval_rougeL": 0.2827187054731721, | |
| "eval_rougeLsum": 0.2825956342144159, | |
| "eval_runtime": 1223.1433, | |
| "eval_samples_per_second": 1.192, | |
| "eval_steps_per_second": 0.199, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0004903332998294422, | |
| "loss": 0.8266, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0004902213102680197, | |
| "loss": 0.85, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0004901086886708695, | |
| "loss": 0.8447, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0004899954353343053, | |
| "loss": 0.8803, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.000489881550556303, | |
| "loss": 0.7804, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0004897670346364998, | |
| "loss": 0.9184, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0004896518878761937, | |
| "loss": 0.8075, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.000489536110578342, | |
| "loss": 0.8086, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0004894197030475614, | |
| "loss": 0.8357, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0004893026655901266, | |
| "loss": 0.818, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_bleu": 0.1152261234460638, | |
| "eval_loss": 0.7954283356666565, | |
| "eval_meteor": 0.18005815233416442, | |
| "eval_rouge1": 0.3480841484526223, | |
| "eval_rouge2": 0.1878809645185771, | |
| "eval_rougeL": 0.2866495022876356, | |
| "eval_rougeLsum": 0.2865319164852567, | |
| "eval_runtime": 1150.8093, | |
| "eval_samples_per_second": 1.267, | |
| "eval_steps_per_second": 0.211, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0004891849985139697, | |
| "loss": 0.919, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0004890667021286794, | |
| "loss": 0.8702, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0004889477767455002, | |
| "loss": 0.8223, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0004888282226773313, | |
| "loss": 0.8881, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0004887080402387262, | |
| "loss": 0.8326, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0004885872297458915, | |
| "loss": 0.7896, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0004884657915166867, | |
| "loss": 0.8202, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0004883437258706224, | |
| "loss": 0.8457, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0004882210331288601, | |
| "loss": 0.8648, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0004880977136142113, | |
| "loss": 0.7916, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_bleu": 0.102031392794918, | |
| "eval_loss": 0.7973849773406982, | |
| "eval_meteor": 0.1697416518292726, | |
| "eval_rouge1": 0.3407404763360161, | |
| "eval_rouge2": 0.18420618651745257, | |
| "eval_rougeL": 0.2837111287248353, | |
| "eval_rougeLsum": 0.28358533147000164, | |
| "eval_runtime": 1060.7578, | |
| "eval_samples_per_second": 1.374, | |
| "eval_steps_per_second": 0.229, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00048797376765113667, | |
| "loss": 0.8344, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0004878491955657448, | |
| "loss": 0.8599, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.000487723997685792, | |
| "loss": 0.8316, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00048759817434068084, | |
| "loss": 0.7967, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00048747172586145954, | |
| "loss": 0.8272, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.0004873446525808212, | |
| "loss": 0.8879, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.0004872169548331027, | |
| "loss": 0.8938, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.0004870886329542841, | |
| "loss": 0.9446, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00048695968728198726, | |
| "loss": 0.8329, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00048683011815547553, | |
| "loss": 0.8701, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_bleu": 0.10951609270697767, | |
| "eval_loss": 0.7971030473709106, | |
| "eval_meteor": 0.1724449245874015, | |
| "eval_rouge1": 0.34523527799298537, | |
| "eval_rouge2": 0.18728792305872893, | |
| "eval_rougeL": 0.2875415995221111, | |
| "eval_rougeLsum": 0.2878305472355617, | |
| "eval_runtime": 1081.3597, | |
| "eval_samples_per_second": 1.348, | |
| "eval_steps_per_second": 0.225, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0004866999259156526, | |
| "loss": 0.856, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0004865691109050615, | |
| "loss": 0.8573, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0004864376734678839, | |
| "loss": 0.813, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.000486305613949939, | |
| "loss": 0.9175, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00048617293269868277, | |
| "loss": 0.8486, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0004860396300632072, | |
| "loss": 0.8752, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0004859057063942387, | |
| "loss": 0.8291, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00048577116204413817, | |
| "loss": 0.8263, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00048563599736689935, | |
| "loss": 0.8634, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00048550021271814793, | |
| "loss": 0.813, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_bleu": 0.11357786165382955, | |
| "eval_loss": 0.7968371510505676, | |
| "eval_meteor": 0.17812534934300692, | |
| "eval_rouge1": 0.34547989314882344, | |
| "eval_rouge2": 0.18647144671781885, | |
| "eval_rougeL": 0.28578813253772484, | |
| "eval_rougeLsum": 0.28593828002638405, | |
| "eval_runtime": 1224.0946, | |
| "eval_samples_per_second": 1.191, | |
| "eval_steps_per_second": 0.199, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.000485363808455141, | |
| "loss": 0.8431, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.0004852267849367659, | |
| "loss": 0.9313, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.000485089142523539, | |
| "loss": 0.8619, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00048495088157760535, | |
| "loss": 0.8258, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00048481200246273715, | |
| "loss": 0.8584, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0004846725055443333, | |
| "loss": 0.8279, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0004845323911894178, | |
| "loss": 0.847, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00048439165976663947, | |
| "loss": 0.837, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00048425031164627056, | |
| "loss": 0.8753, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0004841083472002059, | |
| "loss": 0.8525, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_bleu": 0.11550228990610252, | |
| "eval_loss": 0.7897738218307495, | |
| "eval_meteor": 0.17967220096769168, | |
| "eval_rouge1": 0.34880707864872973, | |
| "eval_rouge2": 0.18938272334418443, | |
| "eval_rougeL": 0.28769647914513974, | |
| "eval_rougeLsum": 0.2877285709755628, | |
| "eval_runtime": 1181.2131, | |
| "eval_samples_per_second": 1.234, | |
| "eval_steps_per_second": 0.206, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0004839657668019619, | |
| "loss": 0.831, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00048382257082667566, | |
| "loss": 0.8446, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00048367875965110366, | |
| "loss": 0.8041, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0004835343336536212, | |
| "loss": 0.8464, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0004833892932142213, | |
| "loss": 0.8198, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00048324363871451325, | |
| "loss": 0.9116, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00048309737053772245, | |
| "loss": 0.8471, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00048295048906868854, | |
| "loss": 0.8676, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00048280299469386493, | |
| "loss": 0.8034, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0004826548878013175, | |
| "loss": 0.8494, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_bleu": 0.11484040536071449, | |
| "eval_loss": 0.7928580641746521, | |
| "eval_meteor": 0.17927183932418322, | |
| "eval_rouge1": 0.353015756939868, | |
| "eval_rouge2": 0.1937422671781578, | |
| "eval_rougeL": 0.29412424979719143, | |
| "eval_rougeLsum": 0.29416263018885086, | |
| "eval_runtime": 1151.8853, | |
| "eval_samples_per_second": 1.266, | |
| "eval_steps_per_second": 0.211, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00048250616878072383, | |
| "loss": 0.8746, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0004823568380233721, | |
| "loss": 0.8028, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0004822068959221598, | |
| "loss": 0.9032, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.000482056342871593, | |
| "loss": 0.8321, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0004819051792677852, | |
| "loss": 0.854, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00048175340550845637, | |
| "loss": 0.9002, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00048160102199293174, | |
| "loss": 0.8416, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00048144802912214094, | |
| "loss": 0.879, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00048129442729861663, | |
| "loss": 0.8377, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00048114021692649404, | |
| "loss": 0.8439, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_bleu": 0.1110213869118398, | |
| "eval_loss": 0.7995119094848633, | |
| "eval_meteor": 0.17509915967969378, | |
| "eval_rouge1": 0.34284195131985784, | |
| "eval_rouge2": 0.1868890431147761, | |
| "eval_rougeL": 0.2857996409683133, | |
| "eval_rougeLsum": 0.28592281635680744, | |
| "eval_runtime": 1147.2498, | |
| "eval_samples_per_second": 1.271, | |
| "eval_steps_per_second": 0.212, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.000480985398411509, | |
| "loss": 0.8648, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00048082997216099797, | |
| "loss": 0.8771, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0004806739385838961, | |
| "loss": 0.8275, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0004805172980907363, | |
| "loss": 0.8215, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00048036005109364856, | |
| "loss": 0.8678, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0004802021980063586, | |
| "loss": 0.8408, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00048004373924418674, | |
| "loss": 0.8536, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0004798846752240468, | |
| "loss": 0.8302, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0004797250063644452, | |
| "loss": 0.8429, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0004795647330854795, | |
| "loss": 0.8562, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_bleu": 0.10131964675585854, | |
| "eval_loss": 0.7919116616249084, | |
| "eval_meteor": 0.1612163387444336, | |
| "eval_rouge1": 0.3392932133690917, | |
| "eval_rouge2": 0.18896904080765833, | |
| "eval_rougeL": 0.2874520136930931, | |
| "eval_rougeLsum": 0.287545512675921, | |
| "eval_runtime": 998.4776, | |
| "eval_samples_per_second": 1.46, | |
| "eval_steps_per_second": 0.243, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00047940385580883785, | |
| "loss": 0.8855, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00047924237495779734, | |
| "loss": 0.845, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00047908029095722305, | |
| "loss": 0.8403, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00047891760423356724, | |
| "loss": 0.8222, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00047875431521486757, | |
| "loss": 0.8677, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0004785904243307468, | |
| "loss": 0.8145, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0004784259320124109, | |
| "loss": 0.8303, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00047826083869264847, | |
| "loss": 0.8224, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00047809514480582916, | |
| "loss": 0.816, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00047792885078790304, | |
| "loss": 0.7636, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_bleu": 0.10954714255898276, | |
| "eval_loss": 0.7920675277709961, | |
| "eval_meteor": 0.17267822313892012, | |
| "eval_rouge1": 0.34506677355407445, | |
| "eval_rouge2": 0.18817280144902515, | |
| "eval_rougeL": 0.2890166522888482, | |
| "eval_rougeLsum": 0.2890120291145137, | |
| "eval_runtime": 1093.769, | |
| "eval_samples_per_second": 1.333, | |
| "eval_steps_per_second": 0.222, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0004777619570763988, | |
| "loss": 0.8926, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00047759446411042335, | |
| "loss": 0.934, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0004774263723306599, | |
| "loss": 0.8923, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0004772576821793674, | |
| "loss": 0.8999, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00047708839410037914, | |
| "loss": 0.8344, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00047691850853910146, | |
| "loss": 0.8274, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0004767480259425128, | |
| "loss": 0.7697, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00047657694675916254, | |
| "loss": 0.8455, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00047640527143916943, | |
| "loss": 0.8216, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0004762330004342209, | |
| "loss": 0.8509, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_bleu": 0.11026373004464625, | |
| "eval_loss": 0.7961094379425049, | |
| "eval_meteor": 0.17107804416084108, | |
| "eval_rouge1": 0.34419235974247625, | |
| "eval_rouge2": 0.1879446304753386, | |
| "eval_rougeL": 0.2879382324644244, | |
| "eval_rougeLsum": 0.28804849261741966, | |
| "eval_runtime": 1114.3068, | |
| "eval_samples_per_second": 1.308, | |
| "eval_steps_per_second": 0.218, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0004760601341975718, | |
| "loss": 0.832, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0004758866731840426, | |
| "loss": 0.8718, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00047571261785001913, | |
| "loss": 0.8075, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0004755379686534507, | |
| "loss": 0.8044, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00047536272605384905, | |
| "loss": 0.8582, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00047518689051228734, | |
| "loss": 0.7933, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00047501046249139885, | |
| "loss": 0.8387, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0004748334424553754, | |
| "loss": 0.8743, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0004746558308699667, | |
| "loss": 0.8453, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00047447762820247876, | |
| "loss": 0.7834, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_bleu": 0.10144013679758371, | |
| "eval_loss": 0.7860347628593445, | |
| "eval_meteor": 0.16863044336346464, | |
| "eval_rouge1": 0.3483945653742756, | |
| "eval_rouge2": 0.1906512981948328, | |
| "eval_rougeL": 0.2928307931237276, | |
| "eval_rougeLsum": 0.292886082360032, | |
| "eval_runtime": 1008.318, | |
| "eval_samples_per_second": 1.446, | |
| "eval_steps_per_second": 0.241, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00047429883492177284, | |
| "loss": 0.8794, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00047411945149826397, | |
| "loss": 0.8534, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00047393947840392015, | |
| "loss": 0.8286, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0004737589161122605, | |
| "loss": 0.9363, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0004735777650983547, | |
| "loss": 0.8218, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00047339602583882105, | |
| "loss": 0.7604, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00047321369881182584, | |
| "loss": 0.9392, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0004730307844970817, | |
| "loss": 0.8586, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00047284728337584637, | |
| "loss": 0.8175, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00047266319593092167, | |
| "loss": 0.8156, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_bleu": 0.11205014855700891, | |
| "eval_loss": 0.7846682071685791, | |
| "eval_meteor": 0.1751311712892055, | |
| "eval_rouge1": 0.35172070697143076, | |
| "eval_rouge2": 0.1931095449214266, | |
| "eval_rougeL": 0.29234436972165456, | |
| "eval_rougeLsum": 0.29205838030588194, | |
| "eval_runtime": 1104.847, | |
| "eval_samples_per_second": 1.32, | |
| "eval_steps_per_second": 0.22, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00047247852264665184, | |
| "loss": 0.821, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0004722932640089228, | |
| "loss": 0.8089, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0004721074205051603, | |
| "loss": 0.8205, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.000471920992624329, | |
| "loss": 0.8564, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0004717339808569312, | |
| "loss": 0.8746, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00047154638569500527, | |
| "loss": 0.7676, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00047135820763212466, | |
| "loss": 0.7534, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0004711694471633963, | |
| "loss": 0.8063, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.0004709801047854596, | |
| "loss": 0.8258, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00047079018099648495, | |
| "loss": 0.8006, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_bleu": 0.10544844410608596, | |
| "eval_loss": 0.779005765914917, | |
| "eval_meteor": 0.16928172352068147, | |
| "eval_rouge1": 0.34521811226055105, | |
| "eval_rouge2": 0.188414735386506, | |
| "eval_rougeL": 0.2883152848683356, | |
| "eval_rougeLsum": 0.28845113954285684, | |
| "eval_runtime": 1093.1651, | |
| "eval_samples_per_second": 1.334, | |
| "eval_steps_per_second": 0.222, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00047059967629617253, | |
| "loss": 0.8696, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00047040859118575087, | |
| "loss": 0.8342, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.0004702169261679755, | |
| "loss": 0.8407, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.000470024681747128, | |
| "loss": 0.8277, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0004698318584290141, | |
| "loss": 0.861, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0004696384567209628, | |
| "loss": 0.8629, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00046944447713182473, | |
| "loss": 0.7462, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0004692499201719712, | |
| "loss": 0.8569, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0004690547863532924, | |
| "loss": 0.7479, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0004688590761891963, | |
| "loss": 0.7449, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_bleu": 0.11320699643880565, | |
| "eval_loss": 0.7808765769004822, | |
| "eval_meteor": 0.17517461553603783, | |
| "eval_rouge1": 0.34801418474464885, | |
| "eval_rouge2": 0.1926526264127003, | |
| "eval_rougeL": 0.2935871923613115, | |
| "eval_rougeLsum": 0.29356108231725353, | |
| "eval_runtime": 1147.8505, | |
| "eval_samples_per_second": 1.27, | |
| "eval_steps_per_second": 0.212, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0004686627901946074, | |
| "loss": 0.8372, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00046846592888596505, | |
| "loss": 0.8033, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0004682684927812225, | |
| "loss": 0.8315, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0004680704823998452, | |
| "loss": 0.7874, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0004678718982628094, | |
| "loss": 0.8616, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0004676727408926012, | |
| "loss": 0.8039, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0004674730108132148, | |
| "loss": 0.8342, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00046727270855015124, | |
| "loss": 0.8528, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.000467071834630417, | |
| "loss": 0.8195, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0004668703895825226, | |
| "loss": 0.8123, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_bleu": 0.12139040928235188, | |
| "eval_loss": 0.7802536487579346, | |
| "eval_meteor": 0.18649658941096503, | |
| "eval_rouge1": 0.35447872684757437, | |
| "eval_rouge2": 0.19104999366552095, | |
| "eval_rougeL": 0.29335305776164255, | |
| "eval_rougeLsum": 0.29324750486448675, | |
| "eval_runtime": 1188.3453, | |
| "eval_samples_per_second": 1.227, | |
| "eval_steps_per_second": 0.204, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0004666683739364812, | |
| "loss": 0.8181, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0004664657882238074, | |
| "loss": 0.8842, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00046626263297751546, | |
| "loss": 0.8528, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0004660589087321183, | |
| "loss": 0.7764, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0004658546160236257, | |
| "loss": 0.8313, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00046564975538954334, | |
| "loss": 0.8438, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00046544432736887097, | |
| "loss": 0.8519, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00046523833250210135, | |
| "loss": 0.809, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0004650317713312183, | |
| "loss": 0.8335, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00046482464439969595, | |
| "loss": 0.8428, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_bleu": 0.1250886172570181, | |
| "eval_loss": 0.7814038991928101, | |
| "eval_meteor": 0.19055488234703907, | |
| "eval_rouge1": 0.3612439239393179, | |
| "eval_rouge2": 0.1958006760878207, | |
| "eval_rougeL": 0.2964134143411231, | |
| "eval_rougeLsum": 0.296116576418878, | |
| "eval_runtime": 1216.6365, | |
| "eval_samples_per_second": 1.198, | |
| "eval_steps_per_second": 0.2, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0004646169522524969, | |
| "loss": 0.8177, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0004644086954360708, | |
| "loss": 0.8482, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0004641998744983529, | |
| "loss": 0.86, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0004639904899887629, | |
| "loss": 0.8508, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0004637805424582032, | |
| "loss": 0.9085, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0004635700324590574, | |
| "loss": 0.8397, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0004633589605451892, | |
| "loss": 0.8734, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00046314732727194063, | |
| "loss": 0.7649, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00046293513319613065, | |
| "loss": 0.8471, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00046272237887605384, | |
| "loss": 0.832, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_bleu": 0.10486572244619506, | |
| "eval_loss": 0.7807884812355042, | |
| "eval_meteor": 0.16818865401790847, | |
| "eval_rouge1": 0.34705793027937726, | |
| "eval_rouge2": 0.19113964280370677, | |
| "eval_rougeL": 0.29299029802431953, | |
| "eval_rougeLsum": 0.29317886290919454, | |
| "eval_runtime": 1105.6756, | |
| "eval_samples_per_second": 1.319, | |
| "eval_steps_per_second": 0.22, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0004625090648714786, | |
| "loss": 0.8337, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00046229519174364607, | |
| "loss": 0.8794, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0004620807600552686, | |
| "loss": 0.7694, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.0004618657703705277, | |
| "loss": 0.8027, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.0004616502232550734, | |
| "loss": 0.8519, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.0004614341192760224, | |
| "loss": 0.8001, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.0004612174590019562, | |
| "loss": 0.8368, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.0004610002430029201, | |
| "loss": 0.8555, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00046078247185042177, | |
| "loss": 0.7932, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00046056414611742903, | |
| "loss": 0.7795, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_bleu": 0.12987137116221253, | |
| "eval_loss": 0.7785532474517822, | |
| "eval_meteor": 0.1977638908170833, | |
| "eval_rouge1": 0.3563200854587399, | |
| "eval_rouge2": 0.19216790450914428, | |
| "eval_rougeL": 0.2927497382733434, | |
| "eval_rougeLsum": 0.2929236969907393, | |
| "eval_runtime": 1319.027, | |
| "eval_samples_per_second": 1.105, | |
| "eval_steps_per_second": 0.184, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00046034526637836926, | |
| "loss": 0.7853, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.0004601258332091274, | |
| "loss": 0.7442, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.0004599058471870443, | |
| "loss": 0.8214, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00045968530889091555, | |
| "loss": 0.7751, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00045946421890098965, | |
| "loss": 0.8645, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00045924257779896693, | |
| "loss": 0.8341, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00045902038616799746, | |
| "loss": 0.8099, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0004587976445926799, | |
| "loss": 0.8532, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0004585743536590599, | |
| "loss": 0.851, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0004583505139546281, | |
| "loss": 0.8155, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_bleu": 0.1275949150703291, | |
| "eval_loss": 0.7744527459144592, | |
| "eval_meteor": 0.19542313704697203, | |
| "eval_rouge1": 0.36221871637002456, | |
| "eval_rouge2": 0.19723981570527915, | |
| "eval_rougeL": 0.29817828224087256, | |
| "eval_rougeLsum": 0.29819760162358966, | |
| "eval_runtime": 1204.787, | |
| "eval_samples_per_second": 1.21, | |
| "eval_steps_per_second": 0.202, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00045812612606831974, | |
| "loss": 0.7528, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00045790119059051156, | |
| "loss": 0.8188, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.0004576757081130216, | |
| "loss": 0.8529, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00045744967922910684, | |
| "loss": 0.7864, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00045722310453346195, | |
| "loss": 0.78, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00045699598462221766, | |
| "loss": 0.813, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.0004567683200929391, | |
| "loss": 0.8402, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.0004565401115446246, | |
| "loss": 0.8541, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00045631135957770343, | |
| "loss": 0.7645, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00045608206479403484, | |
| "loss": 0.8419, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_bleu": 0.11288874484370615, | |
| "eval_loss": 0.7737380862236023, | |
| "eval_meteor": 0.1795823568139638, | |
| "eval_rouge1": 0.3517171303500152, | |
| "eval_rouge2": 0.19480929623517923, | |
| "eval_rougeL": 0.29489583256807006, | |
| "eval_rougeLsum": 0.2948638738211926, | |
| "eval_runtime": 1083.0127, | |
| "eval_samples_per_second": 1.346, | |
| "eval_steps_per_second": 0.224, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00045585222779690636, | |
| "loss": 0.7908, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.0004556218491910321, | |
| "loss": 0.7799, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.0004553909295825508, | |
| "loss": 0.7822, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.0004551594695790251, | |
| "loss": 0.817, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.0004549274697894392, | |
| "loss": 0.7824, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00045469493082419757, | |
| "loss": 0.8274, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0004544618532951231, | |
| "loss": 0.7928, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00045422823781545596, | |
| "loss": 0.8542, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0004539940849998516, | |
| "loss": 0.8367, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00045375939546437916, | |
| "loss": 0.8581, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_bleu": 0.11781251984515774, | |
| "eval_loss": 0.777377724647522, | |
| "eval_meteor": 0.1829209829854384, | |
| "eval_rouge1": 0.35563054870017097, | |
| "eval_rouge2": 0.195963399617126, | |
| "eval_rougeL": 0.2979095627621663, | |
| "eval_rougeLsum": 0.2980344526869577, | |
| "eval_runtime": 1135.666, | |
| "eval_samples_per_second": 1.284, | |
| "eval_steps_per_second": 0.214, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0004535241698265199, | |
| "loss": 0.8475, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.0004532884087051657, | |
| "loss": 0.8985, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.0004530521127206173, | |
| "loss": 0.8487, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.0004528152824945827, | |
| "loss": 0.7998, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00045257791865017537, | |
| "loss": 0.7846, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00045234002181191303, | |
| "loss": 0.7838, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00045210159260571553, | |
| "loss": 0.8362, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00045186263165890344, | |
| "loss": 0.8134, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.0004516231396001965, | |
| "loss": 0.7644, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00045138311705971156, | |
| "loss": 0.8646, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_bleu": 0.13005061015510616, | |
| "eval_loss": 0.7740051746368408, | |
| "eval_meteor": 0.19733944533403236, | |
| "eval_rouge1": 0.3588018183491992, | |
| "eval_rouge2": 0.192617974264134, | |
| "eval_rougeL": 0.2937254663710055, | |
| "eval_rougeLsum": 0.2938043972565847, | |
| "eval_runtime": 1241.046, | |
| "eval_samples_per_second": 1.175, | |
| "eval_steps_per_second": 0.196, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.0004511425646689615, | |
| "loss": 0.7807, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.0004509014830608532, | |
| "loss": 0.8442, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.0004506598728696858, | |
| "loss": 0.8019, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.0004504177347311492, | |
| "loss": 0.7976, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.0004501750692823224, | |
| "loss": 0.9046, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00044993187716167195, | |
| "loss": 0.7559, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.0004496881590090498, | |
| "loss": 0.8358, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00044944391546569213, | |
| "loss": 0.791, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00044919914717421737, | |
| "loss": 0.8007, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.0004489538547786246, | |
| "loss": 0.7515, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_bleu": 0.10412061373178255, | |
| "eval_loss": 0.7685180902481079, | |
| "eval_meteor": 0.1663022168419246, | |
| "eval_rouge1": 0.34510339415285696, | |
| "eval_rouge2": 0.19279130187913826, | |
| "eval_rougeL": 0.2909396669204617, | |
| "eval_rougeLsum": 0.29102359815063095, | |
| "eval_runtime": 1022.5977, | |
| "eval_samples_per_second": 1.426, | |
| "eval_steps_per_second": 0.238, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00044870803892429193, | |
| "loss": 0.8091, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.0004484617002579745, | |
| "loss": 0.827, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.0004482148394278033, | |
| "loss": 0.8435, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00044796745708328297, | |
| "loss": 0.7423, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.0004477195538752902, | |
| "loss": 0.8248, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00044747113045607234, | |
| "loss": 0.8593, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.0004472221874792454, | |
| "loss": 0.8262, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00044697272559979207, | |
| "loss": 0.7762, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00044672274547406067, | |
| "loss": 0.8237, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.0004464722477597629, | |
| "loss": 0.8205, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_bleu": 0.1236377868366298, | |
| "eval_loss": 0.769066572189331, | |
| "eval_meteor": 0.1904404203843731, | |
| "eval_rouge1": 0.36051125596648215, | |
| "eval_rouge2": 0.19601074427606005, | |
| "eval_rougeL": 0.2983201969348075, | |
| "eval_rougeLsum": 0.2983845195227759, | |
| "eval_runtime": 1142.7885, | |
| "eval_samples_per_second": 1.276, | |
| "eval_steps_per_second": 0.213, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.0004462212331159724, | |
| "loss": 0.8109, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.0004459697022031225, | |
| "loss": 0.7642, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.0004457176556830054, | |
| "loss": 0.7603, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.0004454650942187695, | |
| "loss": 0.8168, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.0004452120184749181, | |
| "loss": 0.8137, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00044495842911730773, | |
| "loss": 0.8485, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.0004447043268131462, | |
| "loss": 0.8846, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.0004444497122309909, | |
| "loss": 0.7891, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.0004441945860407471, | |
| "loss": 0.8096, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.000443938948913666, | |
| "loss": 0.7932, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_bleu": 0.11741021582498118, | |
| "eval_loss": 0.7680177688598633, | |
| "eval_meteor": 0.18724966148417066, | |
| "eval_rouge1": 0.3538289045097152, | |
| "eval_rouge2": 0.1926089993689462, | |
| "eval_rougeL": 0.2952244077253912, | |
| "eval_rougeLsum": 0.2950938047080252, | |
| "eval_runtime": 1113.7012, | |
| "eval_samples_per_second": 1.309, | |
| "eval_steps_per_second": 0.218, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00044368280152234333, | |
| "loss": 0.7672, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00044342614454071714, | |
| "loss": 0.7621, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.0004431689786440664, | |
| "loss": 0.8101, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.000442911304509009, | |
| "loss": 0.8431, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.0004426531228134999, | |
| "loss": 0.8133, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.0004423944342368297, | |
| "loss": 0.8458, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.0004421352394596225, | |
| "loss": 0.8306, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00044187553916383445, | |
| "loss": 0.8032, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00044161533403275135, | |
| "loss": 0.8051, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.0004413546247509875, | |
| "loss": 0.8578, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_bleu": 0.12595726943541374, | |
| "eval_loss": 0.7692683339118958, | |
| "eval_meteor": 0.19222266255963855, | |
| "eval_rouge1": 0.3581310742460724, | |
| "eval_rouge2": 0.19531037225008183, | |
| "eval_rougeL": 0.2956186541319774, | |
| "eval_rougeLsum": 0.2956367500630852, | |
| "eval_runtime": 1160.1163, | |
| "eval_samples_per_second": 1.257, | |
| "eval_steps_per_second": 0.209, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00044109341200448385, | |
| "loss": 0.7249, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.0004408316964805056, | |
| "loss": 0.8155, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.000440569478867641, | |
| "loss": 0.8433, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 0.00044030675985579917, | |
| "loss": 0.7484, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00044004354013620875, | |
| "loss": 0.8086, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.0004397798204014154, | |
| "loss": 0.8796, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00043951560134528056, | |
| "loss": 0.8485, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.0004392508836629795, | |
| "loss": 0.7362, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.0004389856680509991, | |
| "loss": 0.8347, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.0004387199552071366, | |
| "loss": 0.8119, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_bleu": 0.12373981502065873, | |
| "eval_loss": 0.7634089589118958, | |
| "eval_meteor": 0.18792913443871737, | |
| "eval_rouge1": 0.3586570378567951, | |
| "eval_rouge2": 0.1957026657950927, | |
| "eval_rougeL": 0.29818979034251414, | |
| "eval_rougeLsum": 0.2982401703305406, | |
| "eval_runtime": 1151.9743, | |
| "eval_samples_per_second": 1.266, | |
| "eval_steps_per_second": 0.211, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00043845374583049735, | |
| "loss": 0.7577, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0004381870406214932, | |
| "loss": 0.7928, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0004379198402818403, | |
| "loss": 0.7664, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00043765214551455794, | |
| "loss": 0.7189, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00043738395702396594, | |
| "loss": 0.8276, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.0004371152755156833, | |
| "loss": 0.7872, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00043684610169662607, | |
| "loss": 0.8111, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00043657643627500575, | |
| "loss": 0.8056, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.0004363062799603271, | |
| "loss": 0.7623, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00043603563346338644, | |
| "loss": 0.8661, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_bleu": 0.11089234547528978, | |
| "eval_loss": 0.7632281184196472, | |
| "eval_meteor": 0.17341941602705138, | |
| "eval_rouge1": 0.34942191982099435, | |
| "eval_rouge2": 0.19591049653677217, | |
| "eval_rougeL": 0.29526297170998683, | |
| "eval_rougeLsum": 0.2952619744332252, | |
| "eval_runtime": 1071.5418, | |
| "eval_samples_per_second": 1.361, | |
| "eval_steps_per_second": 0.227, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00043576449749627, | |
| "loss": 0.7433, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.0004354928727723516, | |
| "loss": 0.7855, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00043522076000629124, | |
| "loss": 0.7527, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00043494815991403275, | |
| "loss": 0.8015, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.0004346750732128023, | |
| "loss": 0.7345, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.0004344015006211062, | |
| "loss": 0.7952, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.0004341274428587294, | |
| "loss": 0.8057, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00043385290064673317, | |
| "loss": 0.8136, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.0004335778747074535, | |
| "loss": 0.8069, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 0.00043330236576449887, | |
| "loss": 0.8397, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_bleu": 0.11830285193176951, | |
| "eval_loss": 0.7616626620292664, | |
| "eval_meteor": 0.1821488344842372, | |
| "eval_rouge1": 0.3558580945132578, | |
| "eval_rouge2": 0.197915707595695, | |
| "eval_rougeL": 0.2981723775850291, | |
| "eval_rougeLsum": 0.29833413809671927, | |
| "eval_runtime": 1132.5137, | |
| "eval_samples_per_second": 1.287, | |
| "eval_steps_per_second": 0.215, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.000433026374542749, | |
| "loss": 0.7386, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00043274990176835217, | |
| "loss": 0.7961, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00043247294816872365, | |
| "loss": 0.8104, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.0004321955144725439, | |
| "loss": 0.8091, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00043191760140975666, | |
| "loss": 0.7693, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.0004316392097115666, | |
| "loss": 0.8092, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.000431360340110438, | |
| "loss": 0.8053, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00043108099334009234, | |
| "loss": 0.7646, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0004308011701355066, | |
| "loss": 0.8395, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0004305208712329114, | |
| "loss": 0.7852, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_bleu": 0.12483561089578614, | |
| "eval_loss": 0.7647390365600586, | |
| "eval_meteor": 0.19032935944350426, | |
| "eval_rouge1": 0.35835454697825203, | |
| "eval_rouge2": 0.19542840978745862, | |
| "eval_rougeL": 0.29801385574610495, | |
| "eval_rougeLsum": 0.29793876372769, | |
| "eval_runtime": 1120.9038, | |
| "eval_samples_per_second": 1.301, | |
| "eval_steps_per_second": 0.217, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0004302400973697888, | |
| "loss": 0.7485, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00042995884928487054, | |
| "loss": 0.7812, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.00042967712771813614, | |
| "loss": 0.7857, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00042939493341081087, | |
| "loss": 0.8019, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00042911226710536365, | |
| "loss": 0.8257, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00042882912954550544, | |
| "loss": 0.7601, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00042854552147618706, | |
| "loss": 0.7856, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.0004282614436435972, | |
| "loss": 0.8138, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.0004279768967951605, | |
| "loss": 0.7765, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00042769188167953565, | |
| "loss": 0.767, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_bleu": 0.1302305276945029, | |
| "eval_loss": 0.7597366571426392, | |
| "eval_meteor": 0.19752698525972517, | |
| "eval_rouge1": 0.36296173703809864, | |
| "eval_rouge2": 0.19816465507239917, | |
| "eval_rougeL": 0.3000226808734052, | |
| "eval_rougeLsum": 0.3000833989034842, | |
| "eval_runtime": 1189.0731, | |
| "eval_samples_per_second": 1.226, | |
| "eval_steps_per_second": 0.204, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.0004274063990466135, | |
| "loss": 0.8156, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.0004271204496475148, | |
| "loss": 0.7648, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00042683403423458843, | |
| "loss": 0.7364, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.00042654715356140946, | |
| "loss": 0.8329, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.0004262598083827769, | |
| "loss": 0.8443, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.000425971999454712, | |
| "loss": 0.8809, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.0004256837275344564, | |
| "loss": 0.7959, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.0004253949933804694, | |
| "loss": 0.82, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00042510579775242684, | |
| "loss": 0.8249, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00042481614141121873, | |
| "loss": 0.8284, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_bleu": 0.13696974043564947, | |
| "eval_loss": 0.7628008127212524, | |
| "eval_meteor": 0.20833444182082805, | |
| "eval_rouge1": 0.367375191425503, | |
| "eval_rouge2": 0.1978131466130248, | |
| "eval_rougeL": 0.29990090210288556, | |
| "eval_rougeLsum": 0.3001498394981842, | |
| "eval_runtime": 1199.5655, | |
| "eval_samples_per_second": 1.215, | |
| "eval_steps_per_second": 0.203, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.000424526025118947, | |
| "loss": 0.7842, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00042423544963892393, | |
| "loss": 0.8718, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.0004239444157356699, | |
| "loss": 0.8612, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00042365292417491135, | |
| "loss": 0.7878, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.000423360975723579, | |
| "loss": 0.8274, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.0004230685711498055, | |
| "loss": 0.8017, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.0004227757112229237, | |
| "loss": 0.8154, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.00042248239671346455, | |
| "loss": 0.7849, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.0004221886283931549, | |
| "loss": 0.8234, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00042189440703491556, | |
| "loss": 0.7984, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_bleu": 0.11530197031936106, | |
| "eval_loss": 0.755507230758667, | |
| "eval_meteor": 0.1806513827098456, | |
| "eval_rouge1": 0.3555621488323981, | |
| "eval_rouge2": 0.19846639016470374, | |
| "eval_rougeL": 0.29831273382603013, | |
| "eval_rougeLsum": 0.2980805463936066, | |
| "eval_runtime": 1059.4931, | |
| "eval_samples_per_second": 1.376, | |
| "eval_steps_per_second": 0.229, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.0004215997334128595, | |
| "loss": 0.8037, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.0004213046083022896, | |
| "loss": 0.7687, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00042100903247969647, | |
| "loss": 0.7573, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.00042071300672275676, | |
| "loss": 0.8173, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.0004204165318103307, | |
| "loss": 0.8508, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00042011960852246044, | |
| "loss": 0.8763, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0004198222376403678, | |
| "loss": 0.8561, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00041952441994645224, | |
| "loss": 0.8034, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00041922615622428885, | |
| "loss": 0.7624, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.000418927447258626, | |
| "loss": 0.8129, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_bleu": 0.1280088834881739, | |
| "eval_loss": 0.7529436945915222, | |
| "eval_meteor": 0.19464046676396524, | |
| "eval_rouge1": 0.3620965211772262, | |
| "eval_rouge2": 0.19866324113960265, | |
| "eval_rougeL": 0.29920122666998356, | |
| "eval_rougeLsum": 0.2993664973235719, | |
| "eval_runtime": 1160.682, | |
| "eval_samples_per_second": 1.256, | |
| "eval_steps_per_second": 0.209, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00041862829383538397, | |
| "loss": 0.8095, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00041832869674165204, | |
| "loss": 0.7788, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00041802865676568695, | |
| "loss": 0.8048, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0004177281746969107, | |
| "loss": 0.8296, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00041742725132590854, | |
| "loss": 0.7797, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0004171258874444266, | |
| "loss": 0.8777, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.0004168240838453702, | |
| "loss": 0.7669, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00041652184132280146, | |
| "loss": 0.831, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00041621916067193746, | |
| "loss": 0.7852, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00041591604268914796, | |
| "loss": 0.7811, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "eval_bleu": 0.12320084852539886, | |
| "eval_loss": 0.7549387216567993, | |
| "eval_meteor": 0.18792325044373648, | |
| "eval_rouge1": 0.35864728570941573, | |
| "eval_rouge2": 0.19706396904795415, | |
| "eval_rougeL": 0.29758291424649863, | |
| "eval_rougeLsum": 0.29778392714680746, | |
| "eval_runtime": 1106.6121, | |
| "eval_samples_per_second": 1.318, | |
| "eval_steps_per_second": 0.22, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.0004156124881719533, | |
| "loss": 0.7769, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.0004153084979190224, | |
| "loss": 0.7397, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00041500407273017075, | |
| "loss": 0.7779, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.0004146992134063581, | |
| "loss": 0.7955, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00041439392074968617, | |
| "loss": 0.7659, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00041408819556339735, | |
| "loss": 0.8533, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00041378203865187154, | |
| "loss": 0.7967, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00041347545082062476, | |
| "loss": 0.7941, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.0004131684328763069, | |
| "loss": 0.849, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00041286098562669926, | |
| "loss": 0.836, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_bleu": 0.12588415215553295, | |
| "eval_loss": 0.75471031665802, | |
| "eval_meteor": 0.1968519512568269, | |
| "eval_rouge1": 0.36628943428680916, | |
| "eval_rouge2": 0.2000519092857415, | |
| "eval_rougeL": 0.30313942317590103, | |
| "eval_rougeLsum": 0.3031091247198662, | |
| "eval_runtime": 1092.5631, | |
| "eval_samples_per_second": 1.334, | |
| "eval_steps_per_second": 0.222, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00041255310988071284, | |
| "loss": 0.7849, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00041224480644838586, | |
| "loss": 0.7259, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.000411936076140882, | |
| "loss": 0.8354, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.0004116269197704881, | |
| "loss": 0.7819, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.0004113173381506117, | |
| "loss": 0.8633, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.0004110073320957795, | |
| "loss": 0.8141, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.0004106969024216348, | |
| "loss": 0.7929, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.0004103860499449355, | |
| "loss": 0.7972, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.0004100747754835518, | |
| "loss": 0.8356, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.0004097630798564643, | |
| "loss": 0.8168, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_bleu": 0.12183344025510169, | |
| "eval_loss": 0.7511031627655029, | |
| "eval_meteor": 0.18681450779014622, | |
| "eval_rouge1": 0.35671979001980275, | |
| "eval_rouge2": 0.1960218610645066, | |
| "eval_rougeL": 0.29562632322337584, | |
| "eval_rougeLsum": 0.2957310907035756, | |
| "eval_runtime": 1123.9062, | |
| "eval_samples_per_second": 1.297, | |
| "eval_steps_per_second": 0.216, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.0004094509638837617, | |
| "loss": 0.7949, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.0004091384283866385, | |
| "loss": 0.8108, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00040882547418739316, | |
| "loss": 0.6972, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00040851210210942577, | |
| "loss": 0.7515, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00040819831297723573, | |
| "loss": 0.7821, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.0004078841076164199, | |
| "loss": 0.7728, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.0004075694868536701, | |
| "loss": 0.7493, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00040725445151677136, | |
| "loss": 0.8138, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.0004069390024345991, | |
| "loss": 0.8215, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.0004066231404371177, | |
| "loss": 0.8057, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_bleu": 0.12330222084393866, | |
| "eval_loss": 0.7514679431915283, | |
| "eval_meteor": 0.19033581377995815, | |
| "eval_rouge1": 0.3581105465101981, | |
| "eval_rouge2": 0.19665944172196212, | |
| "eval_rougeL": 0.2981881930811607, | |
| "eval_rougeLsum": 0.2979884824891669, | |
| "eval_runtime": 1170.8391, | |
| "eval_samples_per_second": 1.245, | |
| "eval_steps_per_second": 0.208, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00040630686635537773, | |
| "loss": 0.7275, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.000405990181021514, | |
| "loss": 0.7879, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00040567308526874324, | |
| "loss": 0.7324, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.00040535557993136236, | |
| "loss": 0.7797, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.0004050376658447456, | |
| "loss": 0.7792, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.0004047193438453427, | |
| "loss": 0.7735, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.0004044006147706767, | |
| "loss": 0.7758, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00040408147945934173, | |
| "loss": 0.8429, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.00040376193875100053, | |
| "loss": 0.7891, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.0004034419934863828, | |
| "loss": 0.8045, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_bleu": 0.12057087168942168, | |
| "eval_loss": 0.7541698217391968, | |
| "eval_meteor": 0.1864159375566591, | |
| "eval_rouge1": 0.3593783361406444, | |
| "eval_rouge2": 0.19683759603742187, | |
| "eval_rougeL": 0.29803496391685336, | |
| "eval_rougeLsum": 0.29788976506923015, | |
| "eval_runtime": 1127.7837, | |
| "eval_samples_per_second": 1.293, | |
| "eval_steps_per_second": 0.215, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.0004031216445072822, | |
| "loss": 0.7893, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.0004028008926565551, | |
| "loss": 0.8821, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.0004024797387781175, | |
| "loss": 0.8032, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.0004021581837169432, | |
| "loss": 0.7978, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00040183622831906166, | |
| "loss": 0.8345, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.0004015138734315554, | |
| "loss": 0.7948, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.0004011911199025584, | |
| "loss": 0.7712, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00040086796858125324, | |
| "loss": 0.8137, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00040054442031786907, | |
| "loss": 0.7523, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.0004002204759636796, | |
| "loss": 0.7927, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_bleu": 0.12228391385106198, | |
| "eval_loss": 0.7471486926078796, | |
| "eval_meteor": 0.19151605381653838, | |
| "eval_rouge1": 0.3620636405755351, | |
| "eval_rouge2": 0.19861702778304668, | |
| "eval_rougeL": 0.30207238821110516, | |
| "eval_rougeLsum": 0.3019676001231871, | |
| "eval_runtime": 1147.7894, | |
| "eval_samples_per_second": 1.27, | |
| "eval_steps_per_second": 0.212, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00039989613637100055, | |
| "loss": 0.7512, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00039957140239318744, | |
| "loss": 0.7385, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00039924627488463374, | |
| "loss": 0.8469, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00039892075470076795, | |
| "loss": 0.72, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.0003985948426980521, | |
| "loss": 0.797, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.0003982685397339789, | |
| "loss": 0.7778, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00039794184666706964, | |
| "loss": 0.7285, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.0003976147643568721, | |
| "loss": 0.7779, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00039728729366395824, | |
| "loss": 0.7841, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.00039695943544992173, | |
| "loss": 0.8402, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_bleu": 0.11653429141819567, | |
| "eval_loss": 0.7500145435333252, | |
| "eval_meteor": 0.18259693460048834, | |
| "eval_rouge1": 0.35693896022311644, | |
| "eval_rouge2": 0.19481212920926488, | |
| "eval_rougeL": 0.2974158389948098, | |
| "eval_rougeLsum": 0.2972789083405306, | |
| "eval_runtime": 1127.326, | |
| "eval_samples_per_second": 1.293, | |
| "eval_steps_per_second": 0.216, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.000396631190577376, | |
| "loss": 0.8434, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.0003963025599099516, | |
| "loss": 0.8225, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 0.0003959735443122943, | |
| "loss": 0.7828, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00039564414465006244, | |
| "loss": 0.7987, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00039531436178992513, | |
| "loss": 0.7857, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.0003949841965995595, | |
| "loss": 0.7992, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.0003946536499476487, | |
| "loss": 0.788, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00039432272270387955, | |
| "loss": 0.769, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00039399141573893997, | |
| "loss": 0.8262, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.00039365972992451735, | |
| "loss": 0.7963, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "eval_bleu": 0.12318553450668913, | |
| "eval_loss": 0.7483591437339783, | |
| "eval_meteor": 0.1913410867293855, | |
| "eval_rouge1": 0.3654868855873549, | |
| "eval_rouge2": 0.20050423202844517, | |
| "eval_rougeL": 0.30447787352072553, | |
| "eval_rougeLsum": 0.30429425068099136, | |
| "eval_runtime": 1124.7101, | |
| "eval_samples_per_second": 1.296, | |
| "eval_steps_per_second": 0.216, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.0003933276661332955, | |
| "loss": 0.7798, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00039299522523895296, | |
| "loss": 0.8611, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.0003926624081161604, | |
| "loss": 0.8131, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.0003923292156405781, | |
| "loss": 0.7202, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.0003919956486888544, | |
| "loss": 0.7797, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.0003916617081386225, | |
| "loss": 0.7561, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.0003913273948684987, | |
| "loss": 0.71, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00039099270975808, | |
| "loss": 0.7608, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.0003906576536879416, | |
| "loss": 0.8031, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00039032222753963483, | |
| "loss": 0.8034, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_bleu": 0.11720116971140243, | |
| "eval_loss": 0.7478321194648743, | |
| "eval_meteor": 0.1819934943700329, | |
| "eval_rouge1": 0.35727692353329465, | |
| "eval_rouge2": 0.19816847975598717, | |
| "eval_rougeL": 0.29895230165351805, | |
| "eval_rougeLsum": 0.29907502151518195, | |
| "eval_runtime": 1070.5188, | |
| "eval_samples_per_second": 1.362, | |
| "eval_steps_per_second": 0.227, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00038998643219568467, | |
| "loss": 0.7886, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00038965026853958755, | |
| "loss": 0.7854, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00038931373745580884, | |
| "loss": 0.7956, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.0003889768398297807, | |
| "loss": 0.7957, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00038863957654789957, | |
| "loss": 0.7563, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.0003883019484975241, | |
| "loss": 0.7558, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00038796395656697267, | |
| "loss": 0.797, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00038762560164552095, | |
| "loss": 0.7864, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.0003872868846233997, | |
| "loss": 0.7932, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.0003869478063917924, | |
| "loss": 0.7569, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_bleu": 0.12021270355030027, | |
| "eval_loss": 0.7468777298927307, | |
| "eval_meteor": 0.18865042542151908, | |
| "eval_rouge1": 0.36340810125388445, | |
| "eval_rouge2": 0.20321855929268942, | |
| "eval_rougeL": 0.3042800348780287, | |
| "eval_rougeLsum": 0.3041477067076571, | |
| "eval_runtime": 1060.9151, | |
| "eval_samples_per_second": 1.374, | |
| "eval_steps_per_second": 0.229, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.0003866083678428328, | |
| "loss": 0.7893, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.0003862685698696028, | |
| "loss": 0.7841, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.0003859284133661299, | |
| "loss": 0.7696, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.0003855878992273849, | |
| "loss": 0.7964, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.0003852470283492796, | |
| "loss": 0.7731, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.0003849058016286644, | |
| "loss": 0.7562, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.00038456421996332593, | |
| "loss": 0.7756, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.00038422228425198456, | |
| "loss": 0.7327, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.00038387999539429255, | |
| "loss": 0.7831, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.0003835373542908308, | |
| "loss": 0.7728, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_bleu": 0.13571042313085763, | |
| "eval_loss": 0.7441371083259583, | |
| "eval_meteor": 0.20429787752537404, | |
| "eval_rouge1": 0.36910530156190763, | |
| "eval_rouge2": 0.20076171169403834, | |
| "eval_rougeL": 0.3028160316079058, | |
| "eval_rougeLsum": 0.3028887886618019, | |
| "eval_runtime": 1240.9106, | |
| "eval_samples_per_second": 1.175, | |
| "eval_steps_per_second": 0.196, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.0003831943618431074, | |
| "loss": 0.8109, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 0.0003828510189535548, | |
| "loss": 0.7687, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00038250732652552713, | |
| "loss": 0.7796, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00038216328546329854, | |
| "loss": 0.7713, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00038181889667206036, | |
| "loss": 0.8039, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.0003814741610579189, | |
| "loss": 0.7761, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00038112907952789264, | |
| "loss": 0.7536, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.0003807836529899106, | |
| "loss": 0.7478, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00038043788235280927, | |
| "loss": 0.7639, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.0003800917685263307, | |
| "loss": 0.7624, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_bleu": 0.13360665201533722, | |
| "eval_loss": 0.743972659111023, | |
| "eval_meteor": 0.19919552001100382, | |
| "eval_rouge1": 0.3659102912435709, | |
| "eval_rouge2": 0.19789641111146775, | |
| "eval_rougeL": 0.3016512273674288, | |
| "eval_rougeLsum": 0.3015437367125981, | |
| "eval_runtime": 1272.3138, | |
| "eval_samples_per_second": 1.146, | |
| "eval_steps_per_second": 0.191, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.0003797453124211196, | |
| "loss": 0.7455, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.0003793985149487215, | |
| "loss": 0.7817, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00037905137702158, | |
| "loss": 0.7936, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00037870389955303426, | |
| "loss": 0.7884, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00037835608345731717, | |
| "loss": 0.7477, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.0003780079296495523, | |
| "loss": 0.7333, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.0003776594390457517, | |
| "loss": 0.7712, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.00037731061256281395, | |
| "loss": 0.8028, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.0003769614511185209, | |
| "loss": 0.836, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.00037661195563153577, | |
| "loss": 0.7102, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_bleu": 0.13683765315402233, | |
| "eval_loss": 0.7432180643081665, | |
| "eval_meteor": 0.20768677295384516, | |
| "eval_rouge1": 0.3735959078332925, | |
| "eval_rouge2": 0.20419374346780084, | |
| "eval_rougeL": 0.30712118478863093, | |
| "eval_rougeLsum": 0.30707788341285575, | |
| "eval_runtime": 1270.0874, | |
| "eval_samples_per_second": 1.148, | |
| "eval_steps_per_second": 0.191, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.000376262127021401, | |
| "loss": 0.7216, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00037591196620853515, | |
| "loss": 0.7167, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.0003755614741142309, | |
| "loss": 0.7174, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.0003752106516606526, | |
| "loss": 0.7206, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.0003748594997708339, | |
| "loss": 0.7271, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.00037450801936867497, | |
| "loss": 0.7166, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.0003741562113789405, | |
| "loss": 0.6894, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.000373804076727257, | |
| "loss": 0.7399, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.0003734516163401105, | |
| "loss": 0.7341, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00037309883114484407, | |
| "loss": 0.6979, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "eval_bleu": 0.11959061229637678, | |
| "eval_loss": 0.7399081587791443, | |
| "eval_meteor": 0.18578293382867828, | |
| "eval_rouge1": 0.35998311194622934, | |
| "eval_rouge2": 0.2008245839204704, | |
| "eval_rougeL": 0.30212159744533995, | |
| "eval_rougeLsum": 0.30208186381396035, | |
| "eval_runtime": 1117.6606, | |
| "eval_samples_per_second": 1.305, | |
| "eval_steps_per_second": 0.217, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00037274572206965516, | |
| "loss": 0.695, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.0003723922900435937, | |
| "loss": 0.7373, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00037203853599655914, | |
| "loss": 0.7002, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.0003716844608592981, | |
| "loss": 0.7566, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00037133006556340216, | |
| "loss": 0.7111, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.0003709753510413052, | |
| "loss": 0.745, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00037062031822628094, | |
| "loss": 0.6765, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.0003702649680524408, | |
| "loss": 0.7619, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00036990930145473083, | |
| "loss": 0.6821, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.0003695533193689298, | |
| "loss": 0.7149, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "eval_bleu": 0.12635236721625973, | |
| "eval_loss": 0.739450216293335, | |
| "eval_meteor": 0.19553725175716402, | |
| "eval_rouge1": 0.365661266915583, | |
| "eval_rouge2": 0.20178360342416046, | |
| "eval_rougeL": 0.3026326239453274, | |
| "eval_rougeLsum": 0.30259399461990677, | |
| "eval_runtime": 1155.7274, | |
| "eval_samples_per_second": 1.262, | |
| "eval_steps_per_second": 0.21, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.00036919702273164657, | |
| "loss": 0.7377, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.00036884041248031753, | |
| "loss": 0.7444, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.0003684834895532042, | |
| "loss": 0.7286, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.0003681262548893909, | |
| "loss": 0.7449, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.0003677687094287819, | |
| "loss": 0.6915, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.0003674108541120995, | |
| "loss": 0.7031, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 0.00036705268988088103, | |
| "loss": 0.7142, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.00036669421767747656, | |
| "loss": 0.7086, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.0003663354384450467, | |
| "loss": 0.7481, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.00036597635312755954, | |
| "loss": 0.6722, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "eval_bleu": 0.11414956111209436, | |
| "eval_loss": 0.7422959804534912, | |
| "eval_meteor": 0.18158717314624995, | |
| "eval_rouge1": 0.35938872641078123, | |
| "eval_rouge2": 0.20238339161949742, | |
| "eval_rougeL": 0.3036388613445834, | |
| "eval_rougeLsum": 0.30368724785496093, | |
| "eval_runtime": 1059.4123, | |
| "eval_samples_per_second": 1.376, | |
| "eval_steps_per_second": 0.229, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.0003656169626697889, | |
| "loss": 0.6965, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.000365257268017311, | |
| "loss": 0.7239, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.0003648972701165027, | |
| "loss": 0.7147, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 0.00036453696991453865, | |
| "loss": 0.6588, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.0003641763683593889, | |
| "loss": 0.6452, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.0003638154663998163, | |
| "loss": 0.7578, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.00036345426498537417, | |
| "loss": 0.6807, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.00036309276506640365, | |
| "loss": 0.7922, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.00036273096759403123, | |
| "loss": 0.6959, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.0003623688735201664, | |
| "loss": 0.7319, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "eval_bleu": 0.13311801380318097, | |
| "eval_loss": 0.739512026309967, | |
| "eval_meteor": 0.20301601278830728, | |
| "eval_rouge1": 0.3697671294885042, | |
| "eval_rouge2": 0.2040980609334162, | |
| "eval_rougeL": 0.30591621894549137, | |
| "eval_rougeLsum": 0.30575850009870087, | |
| "eval_runtime": 1168.6213, | |
| "eval_samples_per_second": 1.248, | |
| "eval_steps_per_second": 0.208, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.00036200648379749903, | |
| "loss": 0.7169, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.00036164379937949666, | |
| "loss": 0.7035, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.00036128082122040224, | |
| "loss": 0.6929, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.0003609175502752319, | |
| "loss": 0.7502, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.0003605539874997716, | |
| "loss": 0.729, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.00036019013385057557, | |
| "loss": 0.6907, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.00035982599028496306, | |
| "loss": 0.6899, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00035946155776101613, | |
| "loss": 0.7194, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.0003590968372375774, | |
| "loss": 0.6805, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00035873182967424667, | |
| "loss": 0.6992, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "eval_bleu": 0.11900569290924122, | |
| "eval_loss": 0.7383832335472107, | |
| "eval_meteor": 0.18448493712506533, | |
| "eval_rouge1": 0.35725738552943453, | |
| "eval_rouge2": 0.19755022515559825, | |
| "eval_rougeL": 0.2990729972948073, | |
| "eval_rougeLsum": 0.2989527020663407, | |
| "eval_runtime": 1155.0098, | |
| "eval_samples_per_second": 1.262, | |
| "eval_steps_per_second": 0.21, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.00035836653603137954, | |
| "loss": 0.6816, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.000358000957270084, | |
| "loss": 0.707, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 0.0003576350943522182, | |
| "loss": 0.6911, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.000357268948240388, | |
| "loss": 0.6851, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.00035690251989794444, | |
| "loss": 0.742, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.0003565358102889809, | |
| "loss": 0.7222, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.00035616882037833083, | |
| "loss": 0.6707, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.00035580155113156545, | |
| "loss": 0.717, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.0003554340035149906, | |
| "loss": 0.6809, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 0.0003550661784956447, | |
| "loss": 0.699, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "eval_bleu": 0.13124721878731666, | |
| "eval_loss": 0.7341772317886353, | |
| "eval_meteor": 0.20086043758202302, | |
| "eval_rouge1": 0.3665359469102716, | |
| "eval_rouge2": 0.20045162880972417, | |
| "eval_rougeL": 0.30219876970116155, | |
| "eval_rougeLsum": 0.30216060698118885, | |
| "eval_runtime": 1217.1151, | |
| "eval_samples_per_second": 1.198, | |
| "eval_steps_per_second": 0.2, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.00035469807704129595, | |
| "loss": 0.7358, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.00035432970012044005, | |
| "loss": 0.7044, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.00035396104870229705, | |
| "loss": 0.7466, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.0003535921237568097, | |
| "loss": 0.7178, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.00035322292625464014, | |
| "loss": 0.7379, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.0003528534571671677, | |
| "loss": 0.6904, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 0.00035248371746648624, | |
| "loss": 0.7317, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 0.0003521137081254016, | |
| "loss": 0.7052, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 0.00035174343011742915, | |
| "loss": 0.756, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 0.000351372884416791, | |
| "loss": 0.7159, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "eval_bleu": 0.13246869374366876, | |
| "eval_loss": 0.7347835898399353, | |
| "eval_meteor": 0.20244538384279492, | |
| "eval_rouge1": 0.37153124241895075, | |
| "eval_rouge2": 0.20420649618044395, | |
| "eval_rougeL": 0.3075002644877919, | |
| "eval_rougeLsum": 0.3076795818578708, | |
| "eval_runtime": 1225.5167, | |
| "eval_samples_per_second": 1.19, | |
| "eval_steps_per_second": 0.198, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 0.00035100207199841374, | |
| "loss": 0.6935, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 0.0003506309938379255, | |
| "loss": 0.7689, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 0.00035025965091165385, | |
| "loss": 0.7423, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.0003498880441966228, | |
| "loss": 0.6649, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.0003495161746705503, | |
| "loss": 0.7144, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.0003491440433118462, | |
| "loss": 0.6854, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.00034877165109960863, | |
| "loss": 0.739, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.0003483989990136226, | |
| "loss": 0.6962, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.0003480260880343565, | |
| "loss": 0.7414, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 0.0003476529191429601, | |
| "loss": 0.7418, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "eval_bleu": 0.12629642603617014, | |
| "eval_loss": 0.7300452589988708, | |
| "eval_meteor": 0.19607433639658048, | |
| "eval_rouge1": 0.36940732143709704, | |
| "eval_rouge2": 0.20324909716054756, | |
| "eval_rougeL": 0.30713128476276175, | |
| "eval_rougeLsum": 0.3072824822901492, | |
| "eval_runtime": 1148.1498, | |
| "eval_samples_per_second": 1.27, | |
| "eval_steps_per_second": 0.212, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 0.0003472794933212616, | |
| "loss": 0.7181, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 0.0003469058115517652, | |
| "loss": 0.7546, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 0.00034653187481764873, | |
| "loss": 0.736, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 0.00034615768410276065, | |
| "loss": 0.744, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 0.0003457832403916177, | |
| "loss": 0.7563, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 0.00034540854466940215, | |
| "loss": 0.6738, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.0003450335979219595, | |
| "loss": 0.7102, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.0003446584011357957, | |
| "loss": 0.7419, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.0003442829552980746, | |
| "loss": 0.7521, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.000343907261396615, | |
| "loss": 0.6713, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "eval_bleu": 0.12662344706646492, | |
| "eval_loss": 0.7302644848823547, | |
| "eval_meteor": 0.19680753344212212, | |
| "eval_rouge1": 0.370672782958349, | |
| "eval_rouge2": 0.2051175852415017, | |
| "eval_rougeL": 0.30716206152120107, | |
| "eval_rougeLsum": 0.30713312439209517, | |
| "eval_runtime": 1127.3029, | |
| "eval_samples_per_second": 1.293, | |
| "eval_steps_per_second": 0.216, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.00034353132041988876, | |
| "loss": 0.7622, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.00034315513335701764, | |
| "loss": 0.6964, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.0003427787011977709, | |
| "loss": 0.7532, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.00034240202493256264, | |
| "loss": 0.6931, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.0003420251055524491, | |
| "loss": 0.7325, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.0003416479440491264, | |
| "loss": 0.6884, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.00034127054141492756, | |
| "loss": 0.7377, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.0003408928986428202, | |
| "loss": 0.7091, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 0.0003405150167264034, | |
| "loss": 0.7379, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.0003401368966599057, | |
| "loss": 0.704, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "eval_bleu": 0.1258064652767695, | |
| "eval_loss": 0.7285297513008118, | |
| "eval_meteor": 0.19691865175723794, | |
| "eval_rouge1": 0.36778390805748723, | |
| "eval_rouge2": 0.20311221027278986, | |
| "eval_rougeL": 0.3054394126025268, | |
| "eval_rougeLsum": 0.3054229081295555, | |
| "eval_runtime": 1127.1968, | |
| "eval_samples_per_second": 1.293, | |
| "eval_steps_per_second": 0.216, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.00033975853943818223, | |
| "loss": 0.7004, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.00033937994605671214, | |
| "loss": 0.7505, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.0003390011175115956, | |
| "loss": 0.7212, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.0003386220547995519, | |
| "loss": 0.7163, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 0.00033824275891791624, | |
| "loss": 0.7683, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 0.00033786323086463734, | |
| "loss": 0.6846, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 0.0003374834716382748, | |
| "loss": 0.7276, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 0.00033710348223799634, | |
| "loss": 0.7359, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 0.00033672326366357544, | |
| "loss": 0.7125, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 0.00033634281691538847, | |
| "loss": 0.7155, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "eval_bleu": 0.12742902408441137, | |
| "eval_loss": 0.7300394773483276, | |
| "eval_meteor": 0.19972564219243125, | |
| "eval_rouge1": 0.36676091746300093, | |
| "eval_rouge2": 0.20014907900892553, | |
| "eval_rougeL": 0.302702557140773, | |
| "eval_rougeLsum": 0.30281701017902063, | |
| "eval_runtime": 1175.8896, | |
| "eval_samples_per_second": 1.24, | |
| "eval_steps_per_second": 0.207, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 0.00033596214299441213, | |
| "loss": 0.6816, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 0.0003355812429022208, | |
| "loss": 0.656, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.000335200117640984, | |
| "loss": 0.7309, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.00033481876821346367, | |
| "loss": 0.7137, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.0003344371956230114, | |
| "loss": 0.7229, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.0003340554008735663, | |
| "loss": 0.7312, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.0003336733849696516, | |
| "loss": 0.6824, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.00033329114891637244, | |
| "loss": 0.7157, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.00033290869371941343, | |
| "loss": 0.7378, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.0003325260203850357, | |
| "loss": 0.7284, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "eval_bleu": 0.1279752717045045, | |
| "eval_loss": 0.7327857613563538, | |
| "eval_meteor": 0.1978933380981099, | |
| "eval_rouge1": 0.3660675412873057, | |
| "eval_rouge2": 0.2007737061001636, | |
| "eval_rougeL": 0.3036859494669802, | |
| "eval_rougeLsum": 0.3035956514223758, | |
| "eval_runtime": 1203.7754, | |
| "eval_samples_per_second": 1.211, | |
| "eval_steps_per_second": 0.202, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.000332143129920074, | |
| "loss": 0.7286, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.00033176002333193475, | |
| "loss": 0.7142, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.0003313767016285929, | |
| "loss": 0.7226, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.00033099316581858924, | |
| "loss": 0.6984, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.000330609416911028, | |
| "loss": 0.7486, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.0003302254559155741, | |
| "loss": 0.6951, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.0003298412838424503, | |
| "loss": 0.6734, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.00032945690170243494, | |
| "loss": 0.7295, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.000329072310506859, | |
| "loss": 0.686, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.0003286875112676035, | |
| "loss": 0.6969, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_bleu": 0.12667328365779612, | |
| "eval_loss": 0.730004072189331, | |
| "eval_meteor": 0.19859259473423635, | |
| "eval_rouge1": 0.36608475297722565, | |
| "eval_rouge2": 0.20181171212849097, | |
| "eval_rougeL": 0.3048158401257285, | |
| "eval_rougeLsum": 0.30473273024993836, | |
| "eval_runtime": 1121.2619, | |
| "eval_samples_per_second": 1.3, | |
| "eval_steps_per_second": 0.217, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.0003283025049970967, | |
| "loss": 0.7053, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.0003279172927083117, | |
| "loss": 0.7112, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.00032753187541476357, | |
| "loss": 0.7294, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.0003271462541305069, | |
| "loss": 0.7703, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.00032676042987013287, | |
| "loss": 0.7219, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.0003263744036487667, | |
| "loss": 0.7527, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.000325988176482065, | |
| "loss": 0.7469, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.00032560174938621326, | |
| "loss": 0.7235, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 0.00032521512337792247, | |
| "loss": 0.7821, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 0.0003248282994744276, | |
| "loss": 0.7279, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "eval_bleu": 0.13693040140777551, | |
| "eval_loss": 0.728911280632019, | |
| "eval_meteor": 0.20888724574067633, | |
| "eval_rouge1": 0.3680729526895363, | |
| "eval_rouge2": 0.2008207536043628, | |
| "eval_rougeL": 0.3017495392967735, | |
| "eval_rougeLsum": 0.30193972403551483, | |
| "eval_runtime": 1263.8659, | |
| "eval_samples_per_second": 1.154, | |
| "eval_steps_per_second": 0.192, | |
| "step": 7900 | |
| } | |
| ], | |
| "max_steps": 19458, | |
| "num_train_epochs": 3, | |
| "total_flos": 2.3091196043722752e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |