| { | |
| "best_metric": 2.132361888885498, | |
| "best_model_checkpoint": "./26-125356_megasuperkanin/checkpoint-100000", | |
| "epoch": 0.9769822970807769, | |
| "global_step": 100000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 5e-05, | |
| "loss": 2.6761, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 5e-05, | |
| "loss": 2.551, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_gen_len": 28.4674, | |
| "eval_loss": 2.423037052154541, | |
| "eval_rouge1": 0.214, | |
| "eval_rouge2": 0.0668, | |
| "eval_rougeL": 0.1717, | |
| "eval_rougeLsum": 0.1777, | |
| "eval_runtime": 1015.6418, | |
| "eval_samples_per_second": 2.265, | |
| "eval_steps_per_second": 0.284, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 5e-05, | |
| "loss": 2.5186, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 5e-05, | |
| "loss": 2.4717, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_gen_len": 25.6604, | |
| "eval_loss": 2.3709843158721924, | |
| "eval_rouge1": 0.2071, | |
| "eval_rouge2": 0.0634, | |
| "eval_rougeL": 0.1686, | |
| "eval_rougeLsum": 0.1745, | |
| "eval_runtime": 951.1096, | |
| "eval_samples_per_second": 2.418, | |
| "eval_steps_per_second": 0.303, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 5e-05, | |
| "loss": 2.4593, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 5e-05, | |
| "loss": 2.4281, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_gen_len": 28.8296, | |
| "eval_loss": 2.3228819370269775, | |
| "eval_rouge1": 0.2137, | |
| "eval_rouge2": 0.0662, | |
| "eval_rougeL": 0.1711, | |
| "eval_rougeLsum": 0.1768, | |
| "eval_runtime": 1022.9494, | |
| "eval_samples_per_second": 2.248, | |
| "eval_steps_per_second": 0.282, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 5e-05, | |
| "loss": 2.4049, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 5e-05, | |
| "loss": 2.3735, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_gen_len": 29.9183, | |
| "eval_loss": 2.2881429195404053, | |
| "eval_rouge1": 0.2164, | |
| "eval_rouge2": 0.0668, | |
| "eval_rougeL": 0.1735, | |
| "eval_rougeLsum": 0.1808, | |
| "eval_runtime": 1036.2984, | |
| "eval_samples_per_second": 2.219, | |
| "eval_steps_per_second": 0.278, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 5e-05, | |
| "loss": 2.3732, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 5e-05, | |
| "loss": 2.377, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_gen_len": 29.5183, | |
| "eval_loss": 2.2759358882904053, | |
| "eval_rouge1": 0.2209, | |
| "eval_rouge2": 0.0694, | |
| "eval_rougeL": 0.1782, | |
| "eval_rougeLsum": 0.1851, | |
| "eval_runtime": 1036.1071, | |
| "eval_samples_per_second": 2.22, | |
| "eval_steps_per_second": 0.278, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 5e-05, | |
| "loss": 2.3513, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 5e-05, | |
| "loss": 2.3444, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_gen_len": 29.3183, | |
| "eval_loss": 2.2552034854888916, | |
| "eval_rouge1": 0.2194, | |
| "eval_rouge2": 0.0679, | |
| "eval_rougeL": 0.1757, | |
| "eval_rougeLsum": 0.1829, | |
| "eval_runtime": 1037.4604, | |
| "eval_samples_per_second": 2.217, | |
| "eval_steps_per_second": 0.278, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 5e-05, | |
| "loss": 2.3504, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 5e-05, | |
| "loss": 2.3203, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_gen_len": 32.2061, | |
| "eval_loss": 2.235518455505371, | |
| "eval_rouge1": 0.2284, | |
| "eval_rouge2": 0.0722, | |
| "eval_rougeL": 0.1819, | |
| "eval_rougeLsum": 0.1892, | |
| "eval_runtime": 1121.1561, | |
| "eval_samples_per_second": 2.051, | |
| "eval_steps_per_second": 0.257, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 5e-05, | |
| "loss": 2.3087, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 5e-05, | |
| "loss": 2.3132, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_gen_len": 29.5452, | |
| "eval_loss": 2.2289836406707764, | |
| "eval_rouge1": 0.2183, | |
| "eval_rouge2": 0.0673, | |
| "eval_rougeL": 0.1759, | |
| "eval_rougeLsum": 0.1827, | |
| "eval_runtime": 1055.2895, | |
| "eval_samples_per_second": 2.179, | |
| "eval_steps_per_second": 0.273, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 5e-05, | |
| "loss": 2.3063, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 5e-05, | |
| "loss": 2.3116, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_gen_len": 30.2935, | |
| "eval_loss": 2.218207359313965, | |
| "eval_rouge1": 0.2239, | |
| "eval_rouge2": 0.07, | |
| "eval_rougeL": 0.1798, | |
| "eval_rougeLsum": 0.1879, | |
| "eval_runtime": 1063.5185, | |
| "eval_samples_per_second": 2.163, | |
| "eval_steps_per_second": 0.271, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 5e-05, | |
| "loss": 2.3014, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2852, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_gen_len": 28.6443, | |
| "eval_loss": 2.2090706825256348, | |
| "eval_rouge1": 0.2251, | |
| "eval_rouge2": 0.0703, | |
| "eval_rougeL": 0.1812, | |
| "eval_rougeLsum": 0.1887, | |
| "eval_runtime": 1045.7282, | |
| "eval_samples_per_second": 2.199, | |
| "eval_steps_per_second": 0.275, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2963, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2683, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_gen_len": 29.9661, | |
| "eval_loss": 2.1879115104675293, | |
| "eval_rouge1": 0.2257, | |
| "eval_rouge2": 0.0716, | |
| "eval_rougeL": 0.1806, | |
| "eval_rougeLsum": 0.1876, | |
| "eval_runtime": 1061.3075, | |
| "eval_samples_per_second": 2.167, | |
| "eval_steps_per_second": 0.271, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2735, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2614, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_gen_len": 30.4435, | |
| "eval_loss": 2.1871089935302734, | |
| "eval_rouge1": 0.2316, | |
| "eval_rouge2": 0.075, | |
| "eval_rougeL": 0.1863, | |
| "eval_rougeLsum": 0.1936, | |
| "eval_runtime": 1083.7377, | |
| "eval_samples_per_second": 2.122, | |
| "eval_steps_per_second": 0.266, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2735, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 5e-05, | |
| "loss": 2.252, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_gen_len": 30.6239, | |
| "eval_loss": 2.175469160079956, | |
| "eval_rouge1": 0.226, | |
| "eval_rouge2": 0.0729, | |
| "eval_rougeL": 0.1834, | |
| "eval_rougeLsum": 0.1914, | |
| "eval_runtime": 1080.4009, | |
| "eval_samples_per_second": 2.129, | |
| "eval_steps_per_second": 0.267, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2509, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 5e-05, | |
| "loss": 2.262, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_gen_len": 30.9983, | |
| "eval_loss": 2.16789174079895, | |
| "eval_rouge1": 0.2256, | |
| "eval_rouge2": 0.0716, | |
| "eval_rougeL": 0.1815, | |
| "eval_rougeLsum": 0.1889, | |
| "eval_runtime": 1104.0224, | |
| "eval_samples_per_second": 2.083, | |
| "eval_steps_per_second": 0.261, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2398, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 5e-05, | |
| "loss": 2.228, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_gen_len": 29.9704, | |
| "eval_loss": 2.1669178009033203, | |
| "eval_rouge1": 0.2253, | |
| "eval_rouge2": 0.0725, | |
| "eval_rougeL": 0.1822, | |
| "eval_rougeLsum": 0.1894, | |
| "eval_runtime": 1052.7669, | |
| "eval_samples_per_second": 2.185, | |
| "eval_steps_per_second": 0.274, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 5e-05, | |
| "loss": 2.25, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 5e-05, | |
| "loss": 2.234, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_gen_len": 29.4826, | |
| "eval_loss": 2.1604671478271484, | |
| "eval_rouge1": 0.2283, | |
| "eval_rouge2": 0.0747, | |
| "eval_rougeL": 0.1855, | |
| "eval_rougeLsum": 0.1937, | |
| "eval_runtime": 1075.8159, | |
| "eval_samples_per_second": 2.138, | |
| "eval_steps_per_second": 0.268, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5e-05, | |
| "loss": 2.236, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2289, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_gen_len": 30.0213, | |
| "eval_loss": 2.1517326831817627, | |
| "eval_rouge1": 0.2226, | |
| "eval_rouge2": 0.0705, | |
| "eval_rougeL": 0.1801, | |
| "eval_rougeLsum": 0.1873, | |
| "eval_runtime": 1072.8178, | |
| "eval_samples_per_second": 2.144, | |
| "eval_steps_per_second": 0.268, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2214, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2043, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_gen_len": 29.5361, | |
| "eval_loss": 2.1455490589141846, | |
| "eval_rouge1": 0.2265, | |
| "eval_rouge2": 0.075, | |
| "eval_rougeL": 0.1838, | |
| "eval_rougeLsum": 0.1908, | |
| "eval_runtime": 1058.731, | |
| "eval_samples_per_second": 2.172, | |
| "eval_steps_per_second": 0.272, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2419, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2259, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_gen_len": 29.6874, | |
| "eval_loss": 2.1389129161834717, | |
| "eval_rouge1": 0.2287, | |
| "eval_rouge2": 0.0713, | |
| "eval_rougeL": 0.1844, | |
| "eval_rougeLsum": 0.1911, | |
| "eval_runtime": 1069.2344, | |
| "eval_samples_per_second": 2.151, | |
| "eval_steps_per_second": 0.269, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2202, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 5e-05, | |
| "loss": 2.2307, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_gen_len": 30.7513, | |
| "eval_loss": 2.132361888885498, | |
| "eval_rouge1": 0.2293, | |
| "eval_rouge2": 0.0741, | |
| "eval_rougeL": 0.1845, | |
| "eval_rougeLsum": 0.1924, | |
| "eval_runtime": 1089.9927, | |
| "eval_samples_per_second": 2.11, | |
| "eval_steps_per_second": 0.264, | |
| "step": 100000 | |
| } | |
| ], | |
| "max_steps": 102356, | |
| "num_train_epochs": 1, | |
| "total_flos": 1.8696291573252096e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |