| { | |
| "best_global_step": 1218, | |
| "best_metric": 0.9136507936507936, | |
| "best_model_checkpoint": "./my_unified_model_classification_latest_only_eng/checkpoint-1218", | |
| "epoch": 2.0, | |
| "eval_steps": 203, | |
| "global_step": 1360, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.07352941176470588, | |
| "grad_norm": 61.0, | |
| "learning_rate": 3.6296296296296302e-06, | |
| "loss": 0.9813, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.14705882352941177, | |
| "grad_norm": 60.5, | |
| "learning_rate": 7.333333333333333e-06, | |
| "loss": 0.6174, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.22058823529411764, | |
| "grad_norm": 63.0, | |
| "learning_rate": 9.885714285714287e-06, | |
| "loss": 0.5302, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.29411764705882354, | |
| "grad_norm": 53.0, | |
| "learning_rate": 9.477551020408164e-06, | |
| "loss": 0.4742, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2985294117647059, | |
| "eval_accuracy": 0.7888735632183907, | |
| "eval_auc": 0.8929358689736048, | |
| "eval_f1": 0.7856609410007468, | |
| "eval_false_negatives": 562, | |
| "eval_false_positives": 1734, | |
| "eval_loss": 0.4537831246852875, | |
| "eval_precision": 0.7081790642881185, | |
| "eval_recall": 0.8821802935010482, | |
| "eval_runtime": 366.4094, | |
| "eval_samples_per_second": 29.68, | |
| "eval_specificity": 0.715970515970516, | |
| "eval_steps_per_second": 0.232, | |
| "eval_true_negatives": 4371, | |
| "eval_true_positives": 4208, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.2985294117647059, | |
| "step": 203, | |
| "train_accuracy": 0.811, | |
| "train_auc": 0.8978184538533006, | |
| "train_f1": 0.8152492668621701, | |
| "train_false_negatives": 46, | |
| "train_false_positives": 143, | |
| "train_loss": 0.44783344864845276, | |
| "train_precision": 0.7446428571428572, | |
| "train_recall": 0.9006479481641468, | |
| "train_runtime": 33.6776, | |
| "train_samples_per_second": 29.693, | |
| "train_specificity": 0.7337057728119181, | |
| "train_steps_per_second": 0.238, | |
| "train_true_negatives": 394, | |
| "train_true_positives": 417 | |
| }, | |
| { | |
| "epoch": 0.36764705882352944, | |
| "grad_norm": 28.5, | |
| "learning_rate": 9.069387755102042e-06, | |
| "loss": 0.4275, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.4411764705882353, | |
| "grad_norm": 45.75, | |
| "learning_rate": 8.661224489795919e-06, | |
| "loss": 0.4036, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5147058823529411, | |
| "grad_norm": 51.5, | |
| "learning_rate": 8.253061224489797e-06, | |
| "loss": 0.3776, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5882352941176471, | |
| "grad_norm": 16.375, | |
| "learning_rate": 7.844897959183674e-06, | |
| "loss": 0.3343, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5970588235294118, | |
| "eval_accuracy": 0.8777931034482759, | |
| "eval_auc": 0.9478171481945067, | |
| "eval_f1": 0.8573881317737955, | |
| "eval_false_negatives": 775, | |
| "eval_false_positives": 554, | |
| "eval_loss": 0.2918412387371063, | |
| "eval_precision": 0.8782149923060013, | |
| "eval_recall": 0.8375262054507338, | |
| "eval_runtime": 366.4435, | |
| "eval_samples_per_second": 29.677, | |
| "eval_specificity": 0.9092547092547093, | |
| "eval_steps_per_second": 0.232, | |
| "eval_true_negatives": 5551, | |
| "eval_true_positives": 3995, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.5970588235294118, | |
| "step": 406, | |
| "train_accuracy": 0.903, | |
| "train_auc": 0.9626825785393716, | |
| "train_f1": 0.8843861740166865, | |
| "train_false_negatives": 59, | |
| "train_false_positives": 38, | |
| "train_loss": 0.24663816392421722, | |
| "train_precision": 0.9070904645476773, | |
| "train_recall": 0.8627906976744186, | |
| "train_runtime": 33.7261, | |
| "train_samples_per_second": 29.651, | |
| "train_specificity": 0.9333333333333333, | |
| "train_steps_per_second": 0.237, | |
| "train_true_negatives": 532, | |
| "train_true_positives": 371 | |
| }, | |
| { | |
| "epoch": 0.6617647058823529, | |
| "grad_norm": 42.0, | |
| "learning_rate": 7.436734693877552e-06, | |
| "loss": 0.3004, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.7352941176470589, | |
| "grad_norm": 15.875, | |
| "learning_rate": 7.028571428571429e-06, | |
| "loss": 0.2891, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8088235294117647, | |
| "grad_norm": 52.75, | |
| "learning_rate": 6.620408163265306e-06, | |
| "loss": 0.279, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.8823529411764706, | |
| "grad_norm": 29.5, | |
| "learning_rate": 6.2122448979591845e-06, | |
| "loss": 0.2745, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.8955882352941177, | |
| "eval_accuracy": 0.9005057471264368, | |
| "eval_auc": 0.9620825284976229, | |
| "eval_f1": 0.8891166222586596, | |
| "eval_false_negatives": 432, | |
| "eval_false_positives": 650, | |
| "eval_loss": 0.2513802945613861, | |
| "eval_precision": 0.8696872493985566, | |
| "eval_recall": 0.909433962264151, | |
| "eval_runtime": 366.5333, | |
| "eval_samples_per_second": 29.67, | |
| "eval_specificity": 0.8935298935298935, | |
| "eval_steps_per_second": 0.232, | |
| "eval_true_negatives": 5455, | |
| "eval_true_positives": 4338, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.8955882352941177, | |
| "step": 609, | |
| "train_accuracy": 0.914, | |
| "train_auc": 0.9714460029236588, | |
| "train_f1": 0.908315565031983, | |
| "train_false_negatives": 33, | |
| "train_false_positives": 53, | |
| "train_loss": 0.21909336745738983, | |
| "train_precision": 0.8893528183716075, | |
| "train_recall": 0.9281045751633987, | |
| "train_runtime": 33.7192, | |
| "train_samples_per_second": 29.657, | |
| "train_specificity": 0.9020332717190388, | |
| "train_steps_per_second": 0.237, | |
| "train_true_negatives": 488, | |
| "train_true_positives": 426 | |
| }, | |
| { | |
| "epoch": 0.9558823529411765, | |
| "grad_norm": 12.5625, | |
| "learning_rate": 5.804081632653061e-06, | |
| "loss": 0.2479, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.0294117647058822, | |
| "grad_norm": 22.375, | |
| "learning_rate": 5.395918367346939e-06, | |
| "loss": 0.2153, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.1029411764705883, | |
| "grad_norm": 31.75, | |
| "learning_rate": 4.987755102040817e-06, | |
| "loss": 0.1901, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.1764705882352942, | |
| "grad_norm": 17.25, | |
| "learning_rate": 4.579591836734694e-06, | |
| "loss": 0.1702, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.1941176470588235, | |
| "eval_accuracy": 0.918528735632184, | |
| "eval_auc": 0.9723367106385974, | |
| "eval_f1": 0.905179794520548, | |
| "eval_false_negatives": 541, | |
| "eval_false_positives": 345, | |
| "eval_loss": 0.21427848935127258, | |
| "eval_precision": 0.9245736773065151, | |
| "eval_recall": 0.8865828092243186, | |
| "eval_runtime": 366.4767, | |
| "eval_samples_per_second": 29.674, | |
| "eval_specificity": 0.9434889434889435, | |
| "eval_steps_per_second": 0.232, | |
| "eval_true_negatives": 5760, | |
| "eval_true_positives": 4229, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.1941176470588235, | |
| "step": 812, | |
| "train_accuracy": 0.947, | |
| "train_auc": 0.9857519084432687, | |
| "train_f1": 0.9399773499433749, | |
| "train_false_negatives": 32, | |
| "train_false_positives": 21, | |
| "train_loss": 0.15449950098991394, | |
| "train_precision": 0.9518348623853211, | |
| "train_recall": 0.9284116331096197, | |
| "train_runtime": 33.6205, | |
| "train_samples_per_second": 29.744, | |
| "train_specificity": 0.9620253164556962, | |
| "train_steps_per_second": 0.238, | |
| "train_true_negatives": 532, | |
| "train_true_positives": 415 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 16.5, | |
| "learning_rate": 4.1714285714285715e-06, | |
| "loss": 0.1766, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.3235294117647058, | |
| "grad_norm": 12.5625, | |
| "learning_rate": 3.7632653061224494e-06, | |
| "loss": 0.1553, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.3970588235294117, | |
| "grad_norm": 19.625, | |
| "learning_rate": 3.3551020408163272e-06, | |
| "loss": 0.1473, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.4705882352941178, | |
| "grad_norm": 24.125, | |
| "learning_rate": 2.946938775510204e-06, | |
| "loss": 0.153, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.4926470588235294, | |
| "eval_accuracy": 0.9233103448275862, | |
| "eval_auc": 0.9760859143878011, | |
| "eval_f1": 0.9123028391167193, | |
| "eval_false_negatives": 432, | |
| "eval_false_positives": 402, | |
| "eval_loss": 0.1964733898639679, | |
| "eval_precision": 0.9151898734177215, | |
| "eval_recall": 0.909433962264151, | |
| "eval_runtime": 366.6051, | |
| "eval_samples_per_second": 29.664, | |
| "eval_specificity": 0.9341523341523341, | |
| "eval_steps_per_second": 0.232, | |
| "eval_true_negatives": 5703, | |
| "eval_true_positives": 4338, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 1.4926470588235294, | |
| "step": 1015, | |
| "train_accuracy": 0.961, | |
| "train_auc": 0.9915747155931913, | |
| "train_f1": 0.9545983701979045, | |
| "train_false_negatives": 23, | |
| "train_false_positives": 16, | |
| "train_loss": 0.1129259318113327, | |
| "train_precision": 0.9624413145539906, | |
| "train_recall": 0.9468822170900693, | |
| "train_runtime": 33.7414, | |
| "train_samples_per_second": 29.637, | |
| "train_specificity": 0.9717813051146384, | |
| "train_steps_per_second": 0.237, | |
| "train_true_negatives": 551, | |
| "train_true_positives": 410 | |
| }, | |
| { | |
| "epoch": 1.5441176470588234, | |
| "grad_norm": 14.875, | |
| "learning_rate": 2.5387755102040816e-06, | |
| "loss": 0.1513, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.6176470588235294, | |
| "grad_norm": 20.375, | |
| "learning_rate": 2.1306122448979595e-06, | |
| "loss": 0.1593, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.6911764705882353, | |
| "grad_norm": 16.5, | |
| "learning_rate": 1.722448979591837e-06, | |
| "loss": 0.1478, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.7647058823529411, | |
| "grad_norm": 13.25, | |
| "learning_rate": 1.3142857142857143e-06, | |
| "loss": 0.1399, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.7911764705882351, | |
| "eval_accuracy": 0.9249655172413793, | |
| "eval_auc": 0.9771551482872238, | |
| "eval_f1": 0.9136507936507936, | |
| "eval_false_negatives": 453, | |
| "eval_false_positives": 363, | |
| "eval_loss": 0.19318822026252747, | |
| "eval_precision": 0.9224358974358975, | |
| "eval_recall": 0.9050314465408805, | |
| "eval_runtime": 366.7824, | |
| "eval_samples_per_second": 29.65, | |
| "eval_specificity": 0.9405405405405406, | |
| "eval_steps_per_second": 0.232, | |
| "eval_true_negatives": 5742, | |
| "eval_true_positives": 4317, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 1.7911764705882351, | |
| "step": 1218, | |
| "train_accuracy": 0.957, | |
| "train_auc": 0.9929256882262804, | |
| "train_f1": 0.9502890173410404, | |
| "train_false_negatives": 26, | |
| "train_false_positives": 17, | |
| "train_loss": 0.11246936023235321, | |
| "train_precision": 0.9602803738317757, | |
| "train_recall": 0.9405034324942791, | |
| "train_runtime": 33.6494, | |
| "train_samples_per_second": 29.718, | |
| "train_specificity": 0.9698046181172292, | |
| "train_steps_per_second": 0.238, | |
| "train_true_negatives": 546, | |
| "train_true_positives": 411 | |
| }, | |
| { | |
| "epoch": 1.8382352941176472, | |
| "grad_norm": 22.25, | |
| "learning_rate": 9.061224489795919e-07, | |
| "loss": 0.1532, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.9117647058823528, | |
| "grad_norm": 13.375, | |
| "learning_rate": 4.979591836734694e-07, | |
| "loss": 0.1489, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.9852941176470589, | |
| "grad_norm": 18.5, | |
| "learning_rate": 8.979591836734695e-08, | |
| "loss": 0.1443, | |
| "step": 1350 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 1360, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 203, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.460630510863122e+18, | |
| "train_batch_size": 128, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |