| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.99812382739212, | |
| "eval_steps": 100, | |
| "global_step": 133, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "completion_length": 90.7151068329811, | |
| "epoch": 0.0375234521575985, | |
| "grad_norm": 2.802288055419922, | |
| "kl": 0.01843569278717041, | |
| "learning_rate": 7.1428571428571436e-06, | |
| "loss": 0.0016, | |
| "reward": 0.061718751792795956, | |
| "reward_std": 0.13259942815639078, | |
| "rewards/accuracy_reward": 0.016406250395812096, | |
| "rewards/format_reward": 0.026302084024064244, | |
| "rewards/relaxed_accuracy_reward": 0.019010417140088977, | |
| "step": 5 | |
| }, | |
| { | |
| "completion_length": 52.738803672790525, | |
| "epoch": 0.075046904315197, | |
| "grad_norm": 4.877239227294922, | |
| "kl": 0.3423828125, | |
| "learning_rate": 1.4285714285714287e-05, | |
| "loss": 0.0019, | |
| "reward": 0.9867187785916031, | |
| "reward_std": 0.4639298491179943, | |
| "rewards/accuracy_reward": 0.13906250363215805, | |
| "rewards/format_reward": 0.6864583493210376, | |
| "rewards/relaxed_accuracy_reward": 0.16119792128447444, | |
| "step": 10 | |
| }, | |
| { | |
| "completion_length": 146.94818139076233, | |
| "epoch": 0.1125703564727955, | |
| "grad_norm": 2.6658596992492676, | |
| "kl": 0.64903564453125, | |
| "learning_rate": 1.9996515418688493e-05, | |
| "loss": 0.0405, | |
| "reward": 1.1822917029261588, | |
| "reward_std": 0.4913041713181883, | |
| "rewards/accuracy_reward": 0.14843750416766852, | |
| "rewards/format_reward": 0.8632812697440386, | |
| "rewards/relaxed_accuracy_reward": 0.17057292158715426, | |
| "step": 15 | |
| }, | |
| { | |
| "completion_length": 47.01458476781845, | |
| "epoch": 0.150093808630394, | |
| "grad_norm": 1.1637159585952759, | |
| "kl": 0.74886474609375, | |
| "learning_rate": 1.9874809871741877e-05, | |
| "loss": 0.025, | |
| "reward": 1.0914062693715096, | |
| "reward_std": 0.19755753134377302, | |
| "rewards/accuracy_reward": 0.05182291832752526, | |
| "rewards/format_reward": 0.9742187555879355, | |
| "rewards/relaxed_accuracy_reward": 0.06536458558402955, | |
| "step": 20 | |
| }, | |
| { | |
| "completion_length": 113.31823229789734, | |
| "epoch": 0.18761726078799248, | |
| "grad_norm": 6.727182388305664, | |
| "kl": 0.301470947265625, | |
| "learning_rate": 1.9581296124106682e-05, | |
| "loss": 0.0255, | |
| "reward": 1.4166667103767394, | |
| "reward_std": 0.39920547502115367, | |
| "rewards/accuracy_reward": 0.18359375651925802, | |
| "rewards/format_reward": 0.9898437581956386, | |
| "rewards/relaxed_accuracy_reward": 0.24322917410172523, | |
| "step": 25 | |
| }, | |
| { | |
| "completion_length": 65.79661650806665, | |
| "epoch": 0.225140712945591, | |
| "grad_norm": 3.8793492317199707, | |
| "kl": 12.40894775390625, | |
| "learning_rate": 1.912108091398988e-05, | |
| "loss": 0.4861, | |
| "reward": 0.8789062738418579, | |
| "reward_std": 0.249878820637241, | |
| "rewards/accuracy_reward": 0.12656250381842254, | |
| "rewards/format_reward": 0.5846354231238365, | |
| "rewards/relaxed_accuracy_reward": 0.16770833763293921, | |
| "step": 30 | |
| }, | |
| { | |
| "completion_length": 65.01015776395798, | |
| "epoch": 0.2626641651031895, | |
| "grad_norm": 127.46144104003906, | |
| "kl": 0.706707763671875, | |
| "learning_rate": 1.8502171357296144e-05, | |
| "loss": 0.0008, | |
| "reward": 0.4466145946178585, | |
| "reward_std": 0.16547852829098703, | |
| "rewards/accuracy_reward": 0.02786458395421505, | |
| "rewards/format_reward": 0.3843750098953024, | |
| "rewards/relaxed_accuracy_reward": 0.034375000698491934, | |
| "step": 35 | |
| }, | |
| { | |
| "completion_length": 44.66328253149986, | |
| "epoch": 0.300187617260788, | |
| "grad_norm": 1.7230250835418701, | |
| "kl": 1.5087890625, | |
| "learning_rate": 1.773533563475053e-05, | |
| "loss": 0.0685, | |
| "reward": 1.279427120834589, | |
| "reward_std": 0.3529853185173124, | |
| "rewards/accuracy_reward": 0.13802083663176745, | |
| "rewards/format_reward": 0.9729166731238366, | |
| "rewards/relaxed_accuracy_reward": 0.16848958830814809, | |
| "step": 40 | |
| }, | |
| { | |
| "completion_length": 18.572396397590637, | |
| "epoch": 0.33771106941838647, | |
| "grad_norm": 197.8912811279297, | |
| "kl": 1.648193359375, | |
| "learning_rate": 1.6833915640265485e-05, | |
| "loss": 0.0695, | |
| "reward": 1.4171875409781933, | |
| "reward_std": 0.25054811174049973, | |
| "rewards/accuracy_reward": 0.18385417158715428, | |
| "rewards/format_reward": 0.9942708384245634, | |
| "rewards/relaxed_accuracy_reward": 0.23906250612344593, | |
| "step": 45 | |
| }, | |
| { | |
| "completion_length": 36.354167491197586, | |
| "epoch": 0.37523452157598497, | |
| "grad_norm": 2.2742254734039307, | |
| "kl": 4.847802734375, | |
| "learning_rate": 1.58135948502146e-05, | |
| "loss": 0.2069, | |
| "reward": 1.3265625357627868, | |
| "reward_std": 0.322022933838889, | |
| "rewards/accuracy_reward": 0.1640625043073669, | |
| "rewards/format_reward": 0.9533854331821203, | |
| "rewards/relaxed_accuracy_reward": 0.2091145885642618, | |
| "step": 50 | |
| }, | |
| { | |
| "completion_length": 32.32109480500221, | |
| "epoch": 0.41275797373358347, | |
| "grad_norm": 1.68628990650177, | |
| "kl": 2.0571533203125, | |
| "learning_rate": 1.4692125452370664e-05, | |
| "loss": 0.07, | |
| "reward": 1.4083333723247051, | |
| "reward_std": 0.32693559252656995, | |
| "rewards/accuracy_reward": 0.18463542151730508, | |
| "rewards/format_reward": 0.9885416757315397, | |
| "rewards/relaxed_accuracy_reward": 0.23515625537838786, | |
| "step": 55 | |
| }, | |
| { | |
| "completion_length": 108.25755517482757, | |
| "epoch": 0.450281425891182, | |
| "grad_norm": 1.052840232849121, | |
| "kl": 0.5584716796875, | |
| "learning_rate": 1.348901948209167e-05, | |
| "loss": 0.0306, | |
| "reward": 1.3903646256774664, | |
| "reward_std": 0.40279707135632636, | |
| "rewards/accuracy_reward": 0.185156254703179, | |
| "rewards/format_reward": 0.9695312656462193, | |
| "rewards/relaxed_accuracy_reward": 0.23567708909977228, | |
| "step": 60 | |
| }, | |
| { | |
| "completion_length": 56.58411636352539, | |
| "epoch": 0.4878048780487805, | |
| "grad_norm": 1.2224760055541992, | |
| "kl": 0.54796142578125, | |
| "learning_rate": 1.2225209339563144e-05, | |
| "loss": 0.0342, | |
| "reward": 1.4695312902331352, | |
| "reward_std": 0.36167940208688376, | |
| "rewards/accuracy_reward": 0.21458333889022468, | |
| "rewards/format_reward": 0.9908854246139527, | |
| "rewards/relaxed_accuracy_reward": 0.2640625067986548, | |
| "step": 65 | |
| }, | |
| { | |
| "completion_length": 61.634637117385864, | |
| "epoch": 0.525328330206379, | |
| "grad_norm": 3.6565780639648438, | |
| "kl": 0.692724609375, | |
| "learning_rate": 1.092268359463302e-05, | |
| "loss": 0.0236, | |
| "reward": 1.4825521290302277, | |
| "reward_std": 0.305070091644302, | |
| "rewards/accuracy_reward": 0.2138020895421505, | |
| "rewards/format_reward": 0.9934895880520344, | |
| "rewards/relaxed_accuracy_reward": 0.2752604250796139, | |
| "step": 70 | |
| }, | |
| { | |
| "completion_length": 84.00182542800903, | |
| "epoch": 0.5628517823639775, | |
| "grad_norm": 2.023277997970581, | |
| "kl": 0.6501220703125, | |
| "learning_rate": 9.604104415737309e-06, | |
| "loss": 0.0362, | |
| "reward": 1.452604204416275, | |
| "reward_std": 0.35850795153528453, | |
| "rewards/accuracy_reward": 0.21250000558793544, | |
| "rewards/format_reward": 0.9677083436399698, | |
| "rewards/relaxed_accuracy_reward": 0.2723958401707932, | |
| "step": 75 | |
| }, | |
| { | |
| "completion_length": 84.90573143959045, | |
| "epoch": 0.600375234521576, | |
| "grad_norm": 4.130603313446045, | |
| "kl": 0.583837890625, | |
| "learning_rate": 8.292413279130625e-06, | |
| "loss": 0.0256, | |
| "reward": 1.0875000283122063, | |
| "reward_std": 0.6249250227585434, | |
| "rewards/accuracy_reward": 0.1419270873069763, | |
| "rewards/format_reward": 0.7656250182539225, | |
| "rewards/relaxed_accuracy_reward": 0.1799479213077575, | |
| "step": 80 | |
| }, | |
| { | |
| "completion_length": 64.10599145889282, | |
| "epoch": 0.6378986866791745, | |
| "grad_norm": 1.4160608053207397, | |
| "kl": 0.508251953125, | |
| "learning_rate": 7.010431818542298e-06, | |
| "loss": 0.0107, | |
| "reward": 1.1182292014360429, | |
| "reward_std": 0.46313118319958446, | |
| "rewards/accuracy_reward": 0.10078125314321369, | |
| "rewards/format_reward": 0.8817708536982536, | |
| "rewards/relaxed_accuracy_reward": 0.13567708695773034, | |
| "step": 85 | |
| }, | |
| { | |
| "completion_length": 64.69922063350677, | |
| "epoch": 0.6754221388367729, | |
| "grad_norm": 1.4829213619232178, | |
| "kl": 0.46568603515625, | |
| "learning_rate": 5.780464759928623e-06, | |
| "loss": 0.0158, | |
| "reward": 1.262760452926159, | |
| "reward_std": 0.3677686099894345, | |
| "rewards/accuracy_reward": 0.12369792014360428, | |
| "rewards/format_reward": 0.9700520999729634, | |
| "rewards/relaxed_accuracy_reward": 0.16901042116805912, | |
| "step": 90 | |
| }, | |
| { | |
| "completion_length": 77.71198177337646, | |
| "epoch": 0.7129455909943715, | |
| "grad_norm": 1.331533670425415, | |
| "kl": 0.4433837890625, | |
| "learning_rate": 4.623911849714226e-06, | |
| "loss": 0.023, | |
| "reward": 1.3916667073965072, | |
| "reward_std": 0.3710032233502716, | |
| "rewards/accuracy_reward": 0.17578125505242498, | |
| "rewards/format_reward": 0.9848958436399698, | |
| "rewards/relaxed_accuracy_reward": 0.2309895897982642, | |
| "step": 95 | |
| }, | |
| { | |
| "completion_length": 97.82890903949738, | |
| "epoch": 0.7504690431519699, | |
| "grad_norm": 1.2931978702545166, | |
| "kl": 0.49661865234375, | |
| "learning_rate": 3.560895528440844e-06, | |
| "loss": 0.0402, | |
| "reward": 1.4742187947034835, | |
| "reward_std": 0.37555707613937556, | |
| "rewards/accuracy_reward": 0.21562500512227417, | |
| "rewards/format_reward": 0.9763020973652601, | |
| "rewards/relaxed_accuracy_reward": 0.282291673310101, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.7504690431519699, | |
| "eval_completion_length": 98.48214570155127, | |
| "eval_kl": 0.5256492269163763, | |
| "eval_loss": 0.028766795992851257, | |
| "eval_reward": 1.4547038757427229, | |
| "eval_reward_std": 0.4197673304882614, | |
| "eval_rewards/accuracy_reward": 0.21239838048244603, | |
| "eval_rewards/format_reward": 0.9732868901943911, | |
| "eval_rewards/relaxed_accuracy_reward": 0.26901859094353087, | |
| "eval_runtime": 3804.2499, | |
| "eval_samples_per_second": 0.301, | |
| "eval_steps_per_second": 0.075, | |
| "step": 100 | |
| }, | |
| { | |
| "completion_length": 99.60547137260437, | |
| "epoch": 0.7879924953095685, | |
| "grad_norm": 1.4268443584442139, | |
| "kl": 0.5451171875, | |
| "learning_rate": 2.6099108277934105e-06, | |
| "loss": 0.0419, | |
| "reward": 1.4901042107492686, | |
| "reward_std": 0.4182614594232291, | |
| "rewards/accuracy_reward": 0.23645834045019, | |
| "rewards/format_reward": 0.9674479342997074, | |
| "rewards/relaxed_accuracy_reward": 0.2861979248933494, | |
| "step": 105 | |
| }, | |
| { | |
| "completion_length": 96.49583611488342, | |
| "epoch": 0.8255159474671669, | |
| "grad_norm": 1.0921823978424072, | |
| "kl": 0.50526123046875, | |
| "learning_rate": 1.7875035823168641e-06, | |
| "loss": 0.0214, | |
| "reward": 1.4427083771675826, | |
| "reward_std": 0.3818355408497155, | |
| "rewards/accuracy_reward": 0.2046875060768798, | |
| "rewards/format_reward": 0.9671875163912773, | |
| "rewards/relaxed_accuracy_reward": 0.27083334026392547, | |
| "step": 110 | |
| }, | |
| { | |
| "completion_length": 90.97396085262298, | |
| "epoch": 0.8630393996247655, | |
| "grad_norm": 1.486311674118042, | |
| "kl": 0.50943603515625, | |
| "learning_rate": 1.1079825545001887e-06, | |
| "loss": 0.0204, | |
| "reward": 1.4427083786576986, | |
| "reward_std": 0.37329327603802087, | |
| "rewards/accuracy_reward": 0.20286458970513194, | |
| "rewards/format_reward": 0.9742187630385161, | |
| "rewards/relaxed_accuracy_reward": 0.26562500847503545, | |
| "step": 115 | |
| }, | |
| { | |
| "completion_length": 86.1117213010788, | |
| "epoch": 0.900562851782364, | |
| "grad_norm": 2.1096079349517822, | |
| "kl": 0.4741455078125, | |
| "learning_rate": 5.831704818578842e-07, | |
| "loss": 0.0203, | |
| "reward": 1.4510417070239783, | |
| "reward_std": 0.3531476927921176, | |
| "rewards/accuracy_reward": 0.21015625533182175, | |
| "rewards/format_reward": 0.9757812641561031, | |
| "rewards/relaxed_accuracy_reward": 0.2651041731471196, | |
| "step": 120 | |
| }, | |
| { | |
| "completion_length": 80.04765882492066, | |
| "epoch": 0.9380863039399625, | |
| "grad_norm": 1.8568741083145142, | |
| "kl": 0.51519775390625, | |
| "learning_rate": 2.2219837744959284e-07, | |
| "loss": 0.0225, | |
| "reward": 1.507031300663948, | |
| "reward_std": 0.4000093450304121, | |
| "rewards/accuracy_reward": 0.2270833398681134, | |
| "rewards/format_reward": 0.9763020988553762, | |
| "rewards/relaxed_accuracy_reward": 0.3036458430346102, | |
| "step": 125 | |
| }, | |
| { | |
| "completion_length": 80.31172132492065, | |
| "epoch": 0.975609756097561, | |
| "grad_norm": 7.431880474090576, | |
| "kl": 0.50054931640625, | |
| "learning_rate": 3.134666272774034e-08, | |
| "loss": 0.013, | |
| "reward": 1.4830729607492685, | |
| "reward_std": 0.3494715398177505, | |
| "rewards/accuracy_reward": 0.2234375062864274, | |
| "rewards/format_reward": 0.9770833492279053, | |
| "rewards/relaxed_accuracy_reward": 0.2825520922895521, | |
| "step": 130 | |
| }, | |
| { | |
| "completion_length": 80.43663430213928, | |
| "epoch": 0.99812382739212, | |
| "kl": 0.5088704427083334, | |
| "reward": 1.4366319850087166, | |
| "reward_std": 0.36855422138857347, | |
| "rewards/accuracy_reward": 0.194444449346823, | |
| "rewards/format_reward": 0.9687500180055698, | |
| "rewards/relaxed_accuracy_reward": 0.2734375084983185, | |
| "step": 133, | |
| "total_flos": 0.0, | |
| "train_loss": 0.051685971918662914, | |
| "train_runtime": 185838.8689, | |
| "train_samples_per_second": 0.092, | |
| "train_steps_per_second": 0.001 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 133, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 15, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |